| Index: google.patch
|
| diff --git a/google.patch b/google.patch
|
| index f12156b4eecc5f5d219844cce2c54c60b1e6d4f3..fcb53b443086670ad83f4abbac4187bb530ffeca 100644
|
| --- a/google.patch
|
| +++ b/google.patch
|
| @@ -1,174 +1,9613 @@
|
| -Index: jdmarker.c
|
| +Index: README
|
| ===================================================================
|
| ---- jdmarker.c (revision 829)
|
| -+++ jdmarker.c (working copy)
|
| -@@ -910,7 +910,7 @@
|
| +--- README (revision 829)
|
| ++++ README (working copy)
|
| +@@ -1,26 +1,26 @@
|
| ++libjpeg-turbo note: This file has been modified by The libjpeg-turbo Project
|
| ++to include only information relevant to libjpeg-turbo, to wordsmith certain
|
| ++sections, and to remove impolitic language that existed in the libjpeg v8
|
| ++README. It is included only for reference. Please see README-turbo.txt for
|
| ++information specific to libjpeg-turbo.
|
| ++
|
| ++
|
| + The Independent JPEG Group's JPEG software
|
| + ==========================================
|
| +
|
| +-README for release 6b of 27-Mar-1998
|
| +-====================================
|
| ++This distribution contains a release of the Independent JPEG Group's free JPEG
|
| ++software. You are welcome to redistribute this software and to use it for any
|
| ++purpose, subject to the conditions under LEGAL ISSUES, below.
|
| +
|
| +-This distribution contains the sixth public release of the Independent JPEG
|
| +-Group's free JPEG software. You are welcome to redistribute this software and
|
| +-to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
|
| ++This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone,
|
| ++Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson,
|
| ++Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers,
|
| ++and other members of the Independent JPEG Group.
|
| +
|
| +-Serious users of this software (particularly those incorporating it into
|
| +-larger programs) should contact IJG at jpeg-info@uunet.uu.net to be added to
|
| +-our electronic mailing list. Mailing list members are notified of updates
|
| +-and have a chance to participate in technical discussions, etc.
|
| ++IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee
|
| ++(also known as JPEG, together with ITU-T SG16).
|
| +
|
| +-This software is the work of Tom Lane, Philip Gladstone, Jim Boucher,
|
| +-Lee Crocker, Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi,
|
| +-Guido Vollbeding, Ge' Weijers, and other members of the Independent JPEG
|
| +-Group.
|
| +
|
| +-IJG is not affiliated with the official ISO JPEG standards committee.
|
| +-
|
| +-
|
| + DOCUMENTATION ROADMAP
|
| + =====================
|
| +
|
| +@@ -30,7 +30,6 @@
|
| + LEGAL ISSUES Copyright, lack of warranty, terms of distribution.
|
| + REFERENCES Where to learn more about JPEG.
|
| + ARCHIVE LOCATIONS Where to find newer versions of this software.
|
| +-RELATED SOFTWARE Other stuff you should get.
|
| + FILE FORMAT WARS Software *not* to get.
|
| + TO DO Plans for future IJG releases.
|
| +
|
| +@@ -37,20 +36,19 @@
|
| + Other documentation files in the distribution are:
|
| +
|
| + User documentation:
|
| +- install.doc How to configure and install the IJG software.
|
| +- usage.doc Usage instructions for cjpeg, djpeg, jpegtran,
|
| ++ install.txt How to configure and install the IJG software.
|
| ++ usage.txt Usage instructions for cjpeg, djpeg, jpegtran,
|
| + rdjpgcom, and wrjpgcom.
|
| +- *.1 Unix-style man pages for programs (same info as usage.doc).
|
| +- wizard.doc Advanced usage instructions for JPEG wizards only.
|
| ++ *.1 Unix-style man pages for programs (same info as usage.txt).
|
| ++ wizard.txt Advanced usage instructions for JPEG wizards only.
|
| + change.log Version-to-version change highlights.
|
| + Programmer and internal documentation:
|
| +- libjpeg.doc How to use the JPEG library in your own programs.
|
| ++ libjpeg.txt How to use the JPEG library in your own programs.
|
| + example.c Sample code for calling the JPEG library.
|
| +- structure.doc Overview of the JPEG library's internal structure.
|
| +- filelist.doc Road map of IJG files.
|
| +- coderules.doc Coding style rules --- please read if you contribute code.
|
| ++ structure.txt Overview of the JPEG library's internal structure.
|
| ++ coderules.txt Coding style rules --- please read if you contribute code.
|
| +
|
| +-Please read at least the files install.doc and usage.doc. Useful information
|
| ++Please read at least the files install.txt and usage.txt. Some information
|
| + can also be found in the JPEG FAQ (Frequently Asked Questions) article. See
|
| + ARCHIVE LOCATIONS below to find out where to obtain the FAQ article.
|
| +
|
| +@@ -62,24 +60,27 @@
|
| + OVERVIEW
|
| + ========
|
| +
|
| +-This package contains C software to implement JPEG image compression and
|
| +-decompression. JPEG (pronounced "jay-peg") is a standardized compression
|
| +-method for full-color and gray-scale images. JPEG is intended for compressing
|
| +-"real-world" scenes; line drawings, cartoons and other non-realistic images
|
| +-are not its strong suit. JPEG is lossy, meaning that the output image is not
|
| +-exactly identical to the input image. Hence you must not use JPEG if you
|
| +-have to have identical output bits. However, on typical photographic images,
|
| +-very good compression levels can be obtained with no visible change, and
|
| +-remarkably high compression levels are possible if you can tolerate a
|
| +-low-quality image. For more details, see the references, or just experiment
|
| +-with various compression settings.
|
| ++This package contains C software to implement JPEG image encoding, decoding,
|
| ++and transcoding. JPEG (pronounced "jay-peg") is a standardized compression
|
| ++method for full-color and gray-scale images. JPEG's strong suit is compressing
|
| ++photographic images or other types of images that have smooth color and
|
| ++brightness transitions between neighboring pixels. Images with sharp lines or
|
| ++other abrupt features may not compress well with JPEG, and a higher JPEG
|
| ++quality may have to be used to avoid visible compression artifacts with such
|
| ++images.
|
| +
|
| ++JPEG is lossy, meaning that the output pixels are not necessarily identical to
|
| ++the input pixels. However, on photographic content and other "smooth" images,
|
| ++very good compression ratios can be obtained with no visible compression
|
| ++artifacts, and extremely high compression ratios are possible if you are
|
| ++willing to sacrifice image quality (by reducing the "quality" setting in the
|
| ++compressor.)
|
| ++
|
| + This software implements JPEG baseline, extended-sequential, and progressive
|
| + compression processes. Provision is made for supporting all variants of these
|
| + processes, although some uncommon parameter settings aren't implemented yet.
|
| +-For legal reasons, we are not distributing code for the arithmetic-coding
|
| +-variants of JPEG; see LEGAL ISSUES. We have made no provision for supporting
|
| +-the hierarchical or lossless processes defined in the standard.
|
| ++We have made no provision for supporting the hierarchical or lossless
|
| ++processes defined in the standard.
|
| +
|
| + We provide a set of library routines for reading and writing JPEG image files,
|
| + plus two sample applications "cjpeg" and "djpeg", which use the library to
|
| +@@ -91,11 +92,12 @@
|
| + for example, the color quantization modules are not strictly part of JPEG
|
| + decoding, but they are essential for output to colormapped file formats or
|
| + colormapped displays. These extra functions can be compiled out of the
|
| +-library if not required for a particular application. We have also included
|
| +-"jpegtran", a utility for lossless transcoding between different JPEG
|
| +-processes, and "rdjpgcom" and "wrjpgcom", two simple applications for
|
| +-inserting and extracting textual comments in JFIF files.
|
| ++library if not required for a particular application.
|
| +
|
| ++We have also included "jpegtran", a utility for lossless transcoding between
|
| ++different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple
|
| ++applications for inserting and extracting textual comments in JFIF files.
|
| ++
|
| + The emphasis in designing this software has been on achieving portability and
|
| + flexibility, while also making it fast enough to be useful. In particular,
|
| + the software is not intended to be read as a tutorial on JPEG. (See the
|
| +@@ -127,7 +129,7 @@
|
| + fitness for a particular purpose. This software is provided "AS IS", and you,
|
| + its user, assume the entire risk as to its quality and accuracy.
|
| +
|
| +-This software is copyright (C) 1991-1998, Thomas G. Lane.
|
| ++This software is copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
|
| + All Rights Reserved except as specified below.
|
| +
|
| + Permission is hereby granted to use, copy, modify, and distribute this
|
| +@@ -158,30 +160,12 @@
|
| + assumed by the product vendor.
|
| +
|
| +
|
| +-ansi2knr.c is included in this distribution by permission of L. Peter Deutsch,
|
| +-sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA.
|
| +-ansi2knr.c is NOT covered by the above copyright and conditions, but instead
|
| +-by the usual distribution terms of the Free Software Foundation; principally,
|
| +-that you must include source code if you redistribute it. (See the file
|
| +-ansi2knr.c for full details.) However, since ansi2knr.c is not needed as part
|
| +-of any program generated from the IJG code, this does not limit you more than
|
| +-the foregoing paragraphs do.
|
| +-
|
| + The Unix configuration script "configure" was produced with GNU Autoconf.
|
| + It is copyright by the Free Software Foundation but is freely distributable.
|
| + The same holds for its supporting scripts (config.guess, config.sub,
|
| +-ltconfig, ltmain.sh). Another support script, install-sh, is copyright
|
| +-by M.I.T. but is also freely distributable.
|
| ++ltmain.sh). Another support script, install-sh, is copyright by X Consortium
|
| ++but is also freely distributable.
|
| +
|
| +-It appears that the arithmetic coding option of the JPEG spec is covered by
|
| +-patents owned by IBM, AT&T, and Mitsubishi. Hence arithmetic coding cannot
|
| +-legally be used without obtaining one or more licenses. For this reason,
|
| +-support for arithmetic coding has been removed from the free JPEG software.
|
| +-(Since arithmetic coding provides only a marginal gain over the unpatented
|
| +-Huffman mode, it is unlikely that very many implementations will support it.)
|
| +-So far as we are aware, there are no patent restrictions on the remaining
|
| +-code.
|
| +-
|
| + The IJG distribution formerly included code to read and write GIF files.
|
| + To avoid entanglement with the Unisys LZW patent, GIF reading support has
|
| + been removed altogether, and the GIF writer has been simplified to produce
|
| +@@ -198,7 +182,7 @@
|
| + REFERENCES
|
| + ==========
|
| +
|
| +-We highly recommend reading one or more of these references before trying to
|
| ++We recommend reading one or more of these references before trying to
|
| + understand the innards of the JPEG software.
|
| +
|
| + The best short technical introduction to the JPEG compression algorithm is
|
| +@@ -207,7 +191,7 @@
|
| + (Adjacent articles in that issue discuss MPEG motion picture compression,
|
| + applications of JPEG, and related topics.) If you don't have the CACM issue
|
| + handy, a PostScript file containing a revised version of Wallace's article is
|
| +-available at ftp://ftp.uu.net/graphics/jpeg/wallace.ps.gz. The file (actually
|
| ++available at http://www.ijg.org/files/wallace.ps.gz. The file (actually
|
| + a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
|
| + omits the sample images that appeared in CACM, but it includes corrections
|
| + and some added material. Note: the Wallace article is copyright ACM and IEEE,
|
| +@@ -222,45 +206,29 @@
|
| + sample code is far from industrial-strength, but when you are ready to look
|
| + at a full implementation, you've got one here...
|
| +
|
| +-The best full description of JPEG is the textbook "JPEG Still Image Data
|
| +-Compression Standard" by William B. Pennebaker and Joan L. Mitchell, published
|
| +-by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. Price US$59.95, 638 pp.
|
| +-The book includes the complete text of the ISO JPEG standards (DIS 10918-1
|
| +-and draft DIS 10918-2). This is by far the most complete exposition of JPEG
|
| +-in existence, and we highly recommend it.
|
| ++The best currently available description of JPEG is the textbook "JPEG Still
|
| ++Image Data Compression Standard" by William B. Pennebaker and Joan L.
|
| ++Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1.
|
| ++Price US$59.95, 638 pp. The book includes the complete text of the ISO JPEG
|
| ++standards (DIS 10918-1 and draft DIS 10918-2).
|
| +
|
| +-The JPEG standard itself is not available electronically; you must order a
|
| +-paper copy through ISO or ITU. (Unless you feel a need to own a certified
|
| +-official copy, we recommend buying the Pennebaker and Mitchell book instead;
|
| +-it's much cheaper and includes a great deal of useful explanatory material.)
|
| +-In the USA, copies of the standard may be ordered from ANSI Sales at (212)
|
| +-642-4900, or from Global Engineering Documents at (800) 854-7179. (ANSI
|
| +-doesn't take credit card orders, but Global does.) It's not cheap: as of
|
| +-1992, ANSI was charging $95 for Part 1 and $47 for Part 2, plus 7%
|
| +-shipping/handling. The standard is divided into two parts, Part 1 being the
|
| +-actual specification, while Part 2 covers compliance testing methods. Part 1
|
| +-is titled "Digital Compression and Coding of Continuous-tone Still Images,
|
| ++The original JPEG standard is divided into two parts, Part 1 being the actual
|
| ++specification, while Part 2 covers compliance testing methods. Part 1 is
|
| ++titled "Digital Compression and Coding of Continuous-tone Still Images,
|
| + Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
|
| + 10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of
|
| + Continuous-tone Still Images, Part 2: Compliance testing" and has document
|
| + numbers ISO/IEC IS 10918-2, ITU-T T.83.
|
| +
|
| +-Some extensions to the original JPEG standard are defined in JPEG Part 3,
|
| +-a newer ISO standard numbered ISO/IEC IS 10918-3 and ITU-T T.84. IJG
|
| +-currently does not support any Part 3 extensions.
|
| +-
|
| + The JPEG standard does not specify all details of an interchangeable file
|
| + format. For the omitted details we follow the "JFIF" conventions, revision
|
| +-1.02. A copy of the JFIF spec is available from:
|
| +- Literature Department
|
| +- C-Cube Microsystems, Inc.
|
| +- 1778 McCarthy Blvd.
|
| +- Milpitas, CA 95035
|
| +- phone (408) 944-6300, fax (408) 944-6314
|
| +-A PostScript version of this document is available by FTP at
|
| +-ftp://ftp.uu.net/graphics/jpeg/jfif.ps.gz. There is also a plain text
|
| +-version at ftp://ftp.uu.net/graphics/jpeg/jfif.txt.gz, but it is missing
|
| +-the figures.
|
| ++1.02. JFIF 1.02 has been adopted as an Ecma International Technical Report
|
| ++and thus received a formal publication status. It is available as a free
|
| ++download in PDF format from
|
| ++http://www.ecma-international.org/publications/techreports/E-TR-098.htm.
|
| ++A PostScript version of the JFIF document is available at
|
| ++http://www.ijg.org/files/jfif.ps.gz. There is also a plain text version at
|
| ++http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures.
|
| +
|
| + The TIFF 6.0 file format specification can be obtained by FTP from
|
| + ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme
|
| +@@ -267,37 +235,24 @@
|
| + found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
|
| + IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
|
| + Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
|
| +-(Compression tag 7). Copies of this Note can be obtained from ftp.sgi.com or
|
| +-from ftp://ftp.uu.net/graphics/jpeg/. It is expected that the next revision
|
| ++(Compression tag 7). Copies of this Note can be obtained from
|
| ++http://www.ijg.org/files/. It is expected that the next revision
|
| + of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
|
| + Although IJG's own code does not support TIFF/JPEG, the free libtiff library
|
| +-uses our library to implement TIFF/JPEG per the Note. libtiff is available
|
| +-from ftp://ftp.sgi.com/graphics/tiff/.
|
| ++uses our library to implement TIFF/JPEG per the Note.
|
| +
|
| +
|
| + ARCHIVE LOCATIONS
|
| + =================
|
| +
|
| +-The "official" archive site for this software is ftp.uu.net (Internet
|
| +-address 192.48.96.9). The most recent released version can always be found
|
| +-there in directory graphics/jpeg. This particular version will be archived
|
| +-as ftp://ftp.uu.net/graphics/jpeg/jpegsrc.v6b.tar.gz. If you don't have
|
| +-direct Internet access, UUNET's archives are also available via UUCP; contact
|
| +-help@uunet.uu.net for information on retrieving files that way.
|
| ++The "official" archive site for this software is www.ijg.org.
|
| ++The most recent released version can always be found there in
|
| ++directory "files". This particular version will be archived as
|
| ++http://www.ijg.org/files/jpegsrc.v8d.tar.gz, and in Windows-compatible
|
| ++"zip" archive format as http://www.ijg.org/files/jpegsr8d.zip.
|
| +
|
| +-Numerous Internet sites maintain copies of the UUNET files. However, only
|
| +-ftp.uu.net is guaranteed to have the latest official version.
|
| +-
|
| +-You can also obtain this software in DOS-compatible "zip" archive format from
|
| +-the SimTel archives (ftp://ftp.simtel.net/pub/simtelnet/msdos/graphics/), or
|
| +-on CompuServe in the Graphics Support forum (GO CIS:GRAPHSUP), library 12
|
| +-"JPEG Tools". Again, these versions may sometimes lag behind the ftp.uu.net
|
| +-release.
|
| +-
|
| +-The JPEG FAQ (Frequently Asked Questions) article is a useful source of
|
| +-general information about JPEG. It is updated constantly and therefore is
|
| +-not included in this distribution. The FAQ is posted every two weeks to
|
| +-Usenet newsgroups comp.graphics.misc, news.answers, and other groups.
|
| ++The JPEG FAQ (Frequently Asked Questions) article is a source of some
|
| ++general information about JPEG.
|
| + It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
|
| + and other news.answers archive sites, including the official news.answers
|
| + archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
|
| +@@ -307,79 +262,21 @@
|
| + send usenet/news.answers/jpeg-faq/part2
|
| +
|
| +
|
| +-RELATED SOFTWARE
|
| +-================
|
| +-
|
| +-Numerous viewing and image manipulation programs now support JPEG. (Quite a
|
| +-few of them use this library to do so.) The JPEG FAQ described above lists
|
| +-some of the more popular free and shareware viewers, and tells where to
|
| +-obtain them on Internet.
|
| +-
|
| +-If you are on a Unix machine, we highly recommend Jef Poskanzer's free
|
| +-PBMPLUS software, which provides many useful operations on PPM-format image
|
| +-files. In particular, it can convert PPM images to and from a wide range of
|
| +-other formats, thus making cjpeg/djpeg considerably more useful. The latest
|
| +-version is distributed by the NetPBM group, and is available from numerous
|
| +-sites, notably ftp://wuarchive.wustl.edu/graphics/graphics/packages/NetPBM/.
|
| +-Unfortunately PBMPLUS/NETPBM is not nearly as portable as the IJG software is;
|
| +-you are likely to have difficulty making it work on any non-Unix machine.
|
| +-
|
| +-A different free JPEG implementation, written by the PVRG group at Stanford,
|
| +-is available from ftp://havefun.stanford.edu/pub/jpeg/. This program
|
| +-is designed for research and experimentation rather than production use;
|
| +-it is slower, harder to use, and less portable than the IJG code, but it
|
| +-is easier to read and modify. Also, the PVRG code supports lossless JPEG,
|
| +-which we do not. (On the other hand, it doesn't do progressive JPEG.)
|
| +-
|
| +-
|
| + FILE FORMAT WARS
|
| + ================
|
| +
|
| +-Some JPEG programs produce files that are not compatible with our library.
|
| +-The root of the problem is that the ISO JPEG committee failed to specify a
|
| +-concrete file format. Some vendors "filled in the blanks" on their own,
|
| +-creating proprietary formats that no one else could read. (For example, none
|
| +-of the early commercial JPEG implementations for the Macintosh were able to
|
| +-exchange compressed files.)
|
| ++The ISO/IEC JTC1/SC29/WG1 standards committee (also known as JPEG, together
|
| ++with ITU-T SG16) currently promotes different formats containing the name
|
| ++"JPEG" which are incompatible with original DCT-based JPEG. IJG therefore does
|
| ++not support these formats (see REFERENCES). Indeed, one of the original
|
| ++reasons for developing this free software was to help force convergence on
|
| ++common, interoperable format standards for JPEG files.
|
| ++Don't use an incompatible file format!
|
| ++(In any case, our decoder will remain capable of reading existing JPEG
|
| ++image files indefinitely.)
|
| +
|
| +-The file format we have adopted is called JFIF (see REFERENCES). This format
|
| +-has been agreed to by a number of major commercial JPEG vendors, and it has
|
| +-become the de facto standard. JFIF is a minimal or "low end" representation.
|
| +-We recommend the use of TIFF/JPEG (TIFF revision 6.0 as modified by TIFF
|
| +-Technical Note #2) for "high end" applications that need to record a lot of
|
| +-additional data about an image. TIFF/JPEG is fairly new and not yet widely
|
| +-supported, unfortunately.
|
| +
|
| +-The upcoming JPEG Part 3 standard defines a file format called SPIFF.
|
| +-SPIFF is interoperable with JFIF, in the sense that most JFIF decoders should
|
| +-be able to read the most common variant of SPIFF. SPIFF has some technical
|
| +-advantages over JFIF, but its major claim to fame is simply that it is an
|
| +-official standard rather than an informal one. At this point it is unclear
|
| +-whether SPIFF will supersede JFIF or whether JFIF will remain the de-facto
|
| +-standard. IJG intends to support SPIFF once the standard is frozen, but we
|
| +-have not decided whether it should become our default output format or not.
|
| +-(In any case, our decoder will remain capable of reading JFIF indefinitely.)
|
| +-
|
| +-Various proprietary file formats incorporating JPEG compression also exist.
|
| +-We have little or no sympathy for the existence of these formats. Indeed,
|
| +-one of the original reasons for developing this free software was to help
|
| +-force convergence on common, open format standards for JPEG files. Don't
|
| +-use a proprietary file format!
|
| +-
|
| +-
|
| + TO DO
|
| + =====
|
| +
|
| +-The major thrust for v7 will probably be improvement of visual quality.
|
| +-The current method for scaling the quantization tables is known not to be
|
| +-very good at low Q values. We also intend to investigate block boundary
|
| +-smoothing, "poor man's variable quantization", and other means of improving
|
| +-quality-vs-file-size performance without sacrificing compatibility.
|
| +-
|
| +-In future versions, we are considering supporting some of the upcoming JPEG
|
| +-Part 3 extensions --- principally, variable quantization and the SPIFF file
|
| +-format.
|
| +-
|
| +-As always, speeding things up is of great interest.
|
| +-
|
| +-Please send bug reports, offers of help, etc. to jpeg-info@uunet.uu.net.
|
| ++Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org.
|
| +Index: bmp.c
|
| +===================================================================
|
| +--- bmp.c (revision 829)
|
| ++++ bmp.c (working copy)
|
| +@@ -1,370 +1,274 @@
|
| +-/* Copyright (C)2004 Landmark Graphics Corporation
|
| +- * Copyright (C)2005 Sun Microsystems, Inc.
|
| ++/*
|
| ++ * Copyright (C)2011 D. R. Commander. All Rights Reserved.
|
| + *
|
| +- * This library is free software and may be redistributed and/or modified under
|
| +- * the terms of the wxWindows Library License, Version 3.1 or (at your option)
|
| +- * any later version. The full license is in the LICENSE.txt file included
|
| +- * with this distribution.
|
| ++ * Redistribution and use in source and binary forms, with or without
|
| ++ * modification, are permitted provided that the following conditions are met:
|
| + *
|
| +- * This library is distributed in the hope that it will be useful,
|
| +- * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| +- * wxWindows Library License for more details.
|
| +-*/
|
| ++ * - Redistributions of source code must retain the above copyright notice,
|
| ++ * this list of conditions and the following disclaimer.
|
| ++ * - Redistributions in binary form must reproduce the above copyright notice,
|
| ++ * this list of conditions and the following disclaimer in the documentation
|
| ++ * and/or other materials provided with the distribution.
|
| ++ * - Neither the name of the libjpeg-turbo Project nor the names of its
|
| ++ * contributors may be used to endorse or promote products derived from this
|
| ++ * software without specific prior written permission.
|
| ++ *
|
| ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
|
| ++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
| ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
| ++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| ++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| ++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
| ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| ++ * POSSIBILITY OF SUCH DAMAGE.
|
| ++ */
|
| +
|
| +-#include <fcntl.h>
|
| +-#include <sys/types.h>
|
| +-#include <sys/stat.h>
|
| +-#include <errno.h>
|
| +-#include <stdlib.h>
|
| + #include <stdio.h>
|
| + #include <string.h>
|
| +-#ifdef _WIN32
|
| +- #include <io.h>
|
| +-#else
|
| +- #include <unistd.h>
|
| +-#endif
|
| +-#include "./rrutil.h"
|
| +-#include "./bmp.h"
|
| ++#include <setjmp.h>
|
| ++#include <errno.h>
|
| ++#include "cdjpeg.h"
|
| ++#include <jpeglib.h>
|
| ++#include <jpegint.h>
|
| ++#include "tjutil.h"
|
| ++#include "bmp.h"
|
| +
|
| +-#ifndef BI_BITFIELDS
|
| +-#define BI_BITFIELDS 3L
|
| +-#endif
|
| +-#ifndef BI_RGB
|
| +-#define BI_RGB 0L
|
| +-#endif
|
| +
|
| +-#define BMPHDRSIZE 54
|
| +-typedef struct _bmphdr
|
| +-{
|
| +- unsigned short bfType;
|
| +- unsigned int bfSize;
|
| +- unsigned short bfReserved1, bfReserved2;
|
| +- unsigned int bfOffBits;
|
| ++/* This duplicates the functionality of the VirtualGL bitmap library using
|
| ++ the components from cjpeg and djpeg */
|
| +
|
| +- unsigned int biSize;
|
| +- int biWidth, biHeight;
|
| +- unsigned short biPlanes, biBitCount;
|
| +- unsigned int biCompression, biSizeImage;
|
| +- int biXPelsPerMeter, biYPelsPerMeter;
|
| +- unsigned int biClrUsed, biClrImportant;
|
| +-} bmphdr;
|
| +
|
| +-static const char *__bmperr="No error";
|
| ++/* Error handling (based on example in example.c) */
|
| +
|
| +-static const int ps[BMPPIXELFORMATS]={3, 4, 3, 4, 4, 4};
|
| +-static const int roffset[BMPPIXELFORMATS]={0, 0, 2, 2, 3, 1};
|
| +-static const int goffset[BMPPIXELFORMATS]={1, 1, 1, 1, 2, 2};
|
| +-static const int boffset[BMPPIXELFORMATS]={2, 2, 0, 0, 1, 3};
|
| ++static char errStr[JMSG_LENGTH_MAX]="No error";
|
| +
|
| +-#define _throw(m) {__bmperr=m; retcode=-1; goto finally;}
|
| +-#define _unix(f) {if((f)==-1) _throw(strerror(errno));}
|
| +-#define _catch(f) {if((f)==-1) {retcode=-1; goto finally;}}
|
| ++struct my_error_mgr
|
| ++{
|
| ++ struct jpeg_error_mgr pub;
|
| ++ jmp_buf setjmp_buffer;
|
| ++};
|
| ++typedef struct my_error_mgr *my_error_ptr;
|
| +
|
| +-#define readme(fd, addr, size) \
|
| +- if((bytesread=read(fd, addr, (size)))==-1) _throw(strerror(errno)); \
|
| +- if(bytesread!=(size)) _throw("Read error");
|
| +-
|
| +-void pixelconvert(unsigned char *srcbuf, enum BMPPIXELFORMAT srcformat,
|
| +- int srcpitch, unsigned char *dstbuf, enum BMPPIXELFORMAT dstformat, int dstpitch,
|
| +- int w, int h, int flip)
|
| ++static void my_error_exit(j_common_ptr cinfo)
|
| + {
|
| +- unsigned char *srcptr, *srcptr0, *dstptr, *dstptr0;
|
| +- int i, j;
|
| +-
|
| +- srcptr=flip? &srcbuf[srcpitch*(h-1)]:srcbuf;
|
| +- for(j=0, dstptr=dstbuf; j<h; j++,
|
| +- srcptr+=flip? -srcpitch:srcpitch, dstptr+=dstpitch)
|
| +- {
|
| +- for(i=0, srcptr0=srcptr, dstptr0=dstptr; i<w; i++,
|
| +- srcptr0+=ps[srcformat], dstptr0+=ps[dstformat])
|
| +- {
|
| +- dstptr0[roffset[dstformat]]=srcptr0[roffset[srcformat]];
|
| +- dstptr0[goffset[dstformat]]=srcptr0[goffset[srcformat]];
|
| +- dstptr0[boffset[dstformat]]=srcptr0[boffset[srcformat]];
|
| +- }
|
| +- }
|
| ++ my_error_ptr myerr=(my_error_ptr)cinfo->err;
|
| ++ (*cinfo->err->output_message)(cinfo);
|
| ++ longjmp(myerr->setjmp_buffer, 1);
|
| + }
|
| +
|
| +-int loadppm(int *fd, unsigned char **buf, int *w, int *h,
|
| +- enum BMPPIXELFORMAT f, int align, int dstbottomup, int ascii)
|
| ++/* Based on output_message() in jerror.c */
|
| ++
|
| ++static void my_output_message(j_common_ptr cinfo)
|
| + {
|
| +- FILE *fs=NULL; int retcode=0, scalefactor, dstpitch;
|
| +- unsigned char *tempbuf=NULL; char temps[255], temps2[255];
|
| +- int numread=0, totalread=0, pixel[3], i, j;
|
| ++ (*cinfo->err->format_message)(cinfo, errStr);
|
| ++}
|
| +
|
| +- if((fs=fdopen(*fd, "r"))==NULL) _throw(strerror(errno));
|
| ++#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \
|
| ++ retval=-1; goto bailout;}
|
| ++#define _throwunix(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s\n%s", m, \
|
| ++ strerror(errno)); retval=-1; goto bailout;}
|
| +
|
| +- do
|
| +- {
|
| +- if(!fgets(temps, 255, fs)) _throw("Read error");
|
| +- if(strlen(temps)==0 || temps[0]=='\n') continue;
|
| +- if(sscanf(temps, "%s", temps2)==1 && temps2[1]=='#') continue;
|
| +- switch(totalread)
|
| +- {
|
| +- case 0:
|
| +- if((numread=sscanf(temps, "%d %d %d", w, h, &scalefactor))==EOF)
|
| +- _throw("Read error");
|
| +- break;
|
| +- case 1:
|
| +- if((numread=sscanf(temps, "%d %d", h, &scalefactor))==EOF)
|
| +- _throw("Read error");
|
| +- break;
|
| +- case 2:
|
| +- if((numread=sscanf(temps, "%d", &scalefactor))==EOF)
|
| +- _throw("Read error");
|
| +- break;
|
| +- }
|
| +- totalread+=numread;
|
| +- } while(totalread<3);
|
| +- if((*w)<1 || (*h)<1 || scalefactor<1) _throw("Corrupt PPM header");
|
| +
|
| +- dstpitch=(((*w)*ps[f])+(align-1))&(~(align-1));
|
| +- if((*buf=(unsigned char *)malloc(dstpitch*(*h)))==NULL)
|
| +- _throw("Memory allocation error");
|
| +- if(ascii)
|
| ++static void pixelconvert(unsigned char *srcbuf, int srcpf, int srcbottomup,
|
| ++ unsigned char *dstbuf, int dstpf, int dstbottomup, int w, int h)
|
| ++{
|
| ++ unsigned char *srcptr=srcbuf, *srcptr2;
|
| ++ int srcps=tjPixelSize[srcpf];
|
| ++ int srcstride=srcbottomup? -w*srcps:w*srcps;
|
| ++ unsigned char *dstptr=dstbuf, *dstptr2;
|
| ++ int dstps=tjPixelSize[dstpf];
|
| ++ int dststride=dstbottomup? -w*dstps:w*dstps;
|
| ++ int row, col;
|
| ++
|
| ++ if(srcbottomup) srcptr=&srcbuf[w*srcps*(h-1)];
|
| ++ if(dstbottomup) dstptr=&dstbuf[w*dstps*(h-1)];
|
| ++ for(row=0; row<h; row++, srcptr+=srcstride, dstptr+=dststride)
|
| + {
|
| +- for(j=0; j<*h; j++)
|
| ++ for(col=0, srcptr2=srcptr, dstptr2=dstptr; col<w; col++, srcptr2+=srcps,
|
| ++ dstptr2+=dstps)
|
| + {
|
| +- for(i=0; i<*w; i++)
|
| +- {
|
| +- if(fscanf(fs, "%d%d%d", &pixel[0], &pixel[1], &pixel[2])!=3)
|
| +- _throw("Read error");
|
| +- (*buf)[j*dstpitch+i*ps[f]+roffset[f]]=(unsigned char)(pixel[0]*255/scalefactor);
|
| +- (*buf)[j*dstpitch+i*ps[f]+goffset[f]]=(unsigned char)(pixel[1]*255/scalefactor);
|
| +- (*buf)[j*dstpitch+i*ps[f]+boffset[f]]=(unsigned char)(pixel[2]*255/scalefactor);
|
| +- }
|
| ++ dstptr2[tjRedOffset[dstpf]]=srcptr2[tjRedOffset[srcpf]];
|
| ++ dstptr2[tjGreenOffset[dstpf]]=srcptr2[tjGreenOffset[srcpf]];
|
| ++ dstptr2[tjBlueOffset[dstpf]]=srcptr2[tjBlueOffset[srcpf]];
|
| + }
|
| + }
|
| +- else
|
| +- {
|
| +- if(scalefactor!=255)
|
| +- _throw("Binary PPMs must have 8-bit components");
|
| +- if((tempbuf=(unsigned char *)malloc((*w)*(*h)*3))==NULL)
|
| +- _throw("Memory allocation error");
|
| +- if(fread(tempbuf, (*w)*(*h)*3, 1, fs)!=1) _throw("Read error");
|
| +- pixelconvert(tempbuf, BMP_RGB, (*w)*3, *buf, f, dstpitch, *w, *h, dstbottomup);
|
| +- }
|
| +-
|
| +- finally:
|
| +- if(fs) {fclose(fs); *fd=-1;}
|
| +- if(tempbuf) free(tempbuf);
|
| +- return retcode;
|
| + }
|
| +
|
| +
|
| + int loadbmp(char *filename, unsigned char **buf, int *w, int *h,
|
| +- enum BMPPIXELFORMAT f, int align, int dstbottomup)
|
| ++ int dstpf, int bottomup)
|
| + {
|
| +- int fd=-1, bytesread, srcpitch, srcbottomup=1, srcps, dstpitch,
|
| +- retcode=0;
|
| +- unsigned char *tempbuf=NULL;
|
| +- bmphdr bh; int flags=O_RDONLY;
|
| ++ int retval=0, dstps, srcpf, tempc;
|
| ++ struct jpeg_compress_struct cinfo;
|
| ++ struct my_error_mgr jerr;
|
| ++ cjpeg_source_ptr src;
|
| ++ FILE *file=NULL;
|
| +
|
| +- dstbottomup=dstbottomup? 1:0;
|
| +- #ifdef _WIN32
|
| +- flags|=O_BINARY;
|
| +- #endif
|
| +- if(!filename || !buf || !w || !h || f<0 || f>BMPPIXELFORMATS-1 || align<1)
|
| +- _throw("invalid argument to loadbmp()");
|
| +- if((align&(align-1))!=0)
|
| +- _throw("Alignment must be a power of 2");
|
| +- _unix(fd=open(filename, flags));
|
| ++ memset(&cinfo, 0, sizeof(struct jpeg_compress_struct));
|
| +
|
| +- readme(fd, &bh.bfType, sizeof(unsigned short));
|
| +- if(!littleendian()) bh.bfType=byteswap16(bh.bfType);
|
| ++ if(!filename || !buf || !w || !h || dstpf<0 || dstpf>=TJ_NUMPF)
|
| ++ _throw("loadbmp(): Invalid argument");
|
| +
|
| +- if(bh.bfType==0x3650)
|
| ++ if((file=fopen(filename, "rb"))==NULL)
|
| ++ _throwunix("loadbmp(): Cannot open input file");
|
| ++
|
| ++ cinfo.err=jpeg_std_error(&jerr.pub);
|
| ++ jerr.pub.error_exit=my_error_exit;
|
| ++ jerr.pub.output_message=my_output_message;
|
| ++
|
| ++ if(setjmp(jerr.setjmp_buffer))
|
| + {
|
| +- _catch(loadppm(&fd, buf, w, h, f, align, dstbottomup, 0));
|
| +- goto finally;
|
| ++ /* If we get here, the JPEG code has signaled an error. */
|
| ++ retval=-1; goto bailout;
|
| + }
|
| +- if(bh.bfType==0x3350)
|
| +- {
|
| +- _catch(loadppm(&fd, buf, w, h, f, align, dstbottomup, 1));
|
| +- goto finally;
|
| +- }
|
| +
|
| +- readme(fd, &bh.bfSize, sizeof(unsigned int));
|
| +- readme(fd, &bh.bfReserved1, sizeof(unsigned short));
|
| +- readme(fd, &bh.bfReserved2, sizeof(unsigned short));
|
| +- readme(fd, &bh.bfOffBits, sizeof(unsigned int));
|
| +- readme(fd, &bh.biSize, sizeof(unsigned int));
|
| +- readme(fd, &bh.biWidth, sizeof(int));
|
| +- readme(fd, &bh.biHeight, sizeof(int));
|
| +- readme(fd, &bh.biPlanes, sizeof(unsigned short));
|
| +- readme(fd, &bh.biBitCount, sizeof(unsigned short));
|
| +- readme(fd, &bh.biCompression, sizeof(unsigned int));
|
| +- readme(fd, &bh.biSizeImage, sizeof(unsigned int));
|
| +- readme(fd, &bh.biXPelsPerMeter, sizeof(int));
|
| +- readme(fd, &bh.biYPelsPerMeter, sizeof(int));
|
| +- readme(fd, &bh.biClrUsed, sizeof(unsigned int));
|
| +- readme(fd, &bh.biClrImportant, sizeof(unsigned int));
|
| ++ jpeg_create_compress(&cinfo);
|
| ++ if((tempc=getc(file))<0 || ungetc(tempc, file)==EOF)
|
| ++ _throwunix("loadbmp(): Could not read input file")
|
| ++ else if(tempc==EOF) _throw("loadbmp(): Input file contains no data");
|
| +
|
| +- if(!littleendian())
|
| ++ if(tempc=='B')
|
| + {
|
| +- bh.bfSize=byteswap(bh.bfSize);
|
| +- bh.bfOffBits=byteswap(bh.bfOffBits);
|
| +- bh.biSize=byteswap(bh.biSize);
|
| +- bh.biWidth=byteswap(bh.biWidth);
|
| +- bh.biHeight=byteswap(bh.biHeight);
|
| +- bh.biPlanes=byteswap16(bh.biPlanes);
|
| +- bh.biBitCount=byteswap16(bh.biBitCount);
|
| +- bh.biCompression=byteswap(bh.biCompression);
|
| +- bh.biSizeImage=byteswap(bh.biSizeImage);
|
| +- bh.biXPelsPerMeter=byteswap(bh.biXPelsPerMeter);
|
| +- bh.biYPelsPerMeter=byteswap(bh.biYPelsPerMeter);
|
| +- bh.biClrUsed=byteswap(bh.biClrUsed);
|
| +- bh.biClrImportant=byteswap(bh.biClrImportant);
|
| ++ if((src=jinit_read_bmp(&cinfo))==NULL)
|
| ++ _throw("loadbmp(): Could not initialize bitmap loader");
|
| + }
|
| ++ else if(tempc=='P')
|
| ++ {
|
| ++ if((src=jinit_read_ppm(&cinfo))==NULL)
|
| ++ _throw("loadbmp(): Could not initialize bitmap loader");
|
| ++ }
|
| ++ else _throw("loadbmp(): Unsupported file type");
|
| +
|
| +- if(bh.bfType!=0x4d42 || bh.bfOffBits<BMPHDRSIZE
|
| +- || bh.biWidth<1 || bh.biHeight==0)
|
| +- _throw("Corrupt bitmap header");
|
| +- if((bh.biBitCount!=24 && bh.biBitCount!=32) || bh.biCompression!=BI_RGB)
|
| +- _throw("Only uncompessed RGB bitmaps are supported");
|
| ++ src->input_file=file;
|
| ++ (*src->start_input)(&cinfo, src);
|
| ++ (*cinfo.mem->realize_virt_arrays)((j_common_ptr)&cinfo);
|
| +
|
| +- *w=bh.biWidth; *h=bh.biHeight; srcps=bh.biBitCount/8;
|
| +- if(*h<0) {*h=-(*h); srcbottomup=0;}
|
| +- srcpitch=(((*w)*srcps)+3)&(~3);
|
| +- dstpitch=(((*w)*ps[f])+(align-1))&(~(align-1));
|
| ++ *w=cinfo.image_width; *h=cinfo.image_height;
|
| +
|
| +- if(srcpitch*(*h)+bh.bfOffBits!=bh.bfSize) _throw("Corrupt bitmap header");
|
| +- if((tempbuf=(unsigned char *)malloc(srcpitch*(*h)))==NULL
|
| +- || (*buf=(unsigned char *)malloc(dstpitch*(*h)))==NULL)
|
| +- _throw("Memory allocation error");
|
| +- if(lseek(fd, (long)bh.bfOffBits, SEEK_SET)!=(long)bh.bfOffBits)
|
| +- _throw(strerror(errno));
|
| +- _unix(bytesread=read(fd, tempbuf, srcpitch*(*h)));
|
| +- if(bytesread!=srcpitch*(*h)) _throw("Read error");
|
| ++ if(cinfo.input_components==1 && cinfo.in_color_space==JCS_RGB)
|
| ++ srcpf=TJPF_GRAY;
|
| ++ else srcpf=TJPF_RGB;
|
| +
|
| +- pixelconvert(tempbuf, BMP_BGR, srcpitch, *buf, f, dstpitch, *w, *h,
|
| +- srcbottomup!=dstbottomup);
|
| ++ dstps=tjPixelSize[dstpf];
|
| ++ if((*buf=(unsigned char *)malloc((*w)*(*h)*dstps))==NULL)
|
| ++ _throw("loadbmp(): Memory allocation failure");
|
| +
|
| +- finally:
|
| +- if(tempbuf) free(tempbuf);
|
| +- if(fd!=-1) close(fd);
|
| +- return retcode;
|
| ++ while(cinfo.next_scanline<cinfo.image_height)
|
| ++ {
|
| ++ int i, nlines=(*src->get_pixel_rows)(&cinfo, src);
|
| ++ for(i=0; i<nlines; i++)
|
| ++ {
|
| ++ unsigned char *outbuf; int row;
|
| ++ row=cinfo.next_scanline+i;
|
| ++ if(bottomup) outbuf=&(*buf)[((*h)-row-1)*(*w)*dstps];
|
| ++ else outbuf=&(*buf)[row*(*w)*dstps];
|
| ++ pixelconvert(src->buffer[i], srcpf, 0, outbuf, dstpf, bottomup, *w,
|
| ++ nlines);
|
| ++ }
|
| ++ cinfo.next_scanline+=nlines;
|
| ++ }
|
| ++
|
| ++ (*src->finish_input)(&cinfo, src);
|
| ++
|
| ++ bailout:
|
| ++ jpeg_destroy_compress(&cinfo);
|
| ++ if(file) fclose(file);
|
| ++ if(retval<0 && buf && *buf) {free(*buf); *buf=NULL;}
|
| ++ return retval;
|
| + }
|
| +
|
| +-#define writeme(fd, addr, size) \
|
| +- if((byteswritten=write(fd, addr, (size)))==-1) _throw(strerror(errno)); \
|
| +- if(byteswritten!=(size)) _throw("Write error");
|
| +
|
| +-int saveppm(char *filename, unsigned char *buf, int w, int h,
|
| +- enum BMPPIXELFORMAT f, int srcpitch, int srcbottomup)
|
| ++int savebmp(char *filename, unsigned char *buf, int w, int h, int srcpf,
|
| ++ int bottomup)
|
| + {
|
| +- FILE *fs=NULL; int retcode=0;
|
| +- unsigned char *tempbuf=NULL;
|
| ++ int retval=0, srcps, dstpf;
|
| ++ struct jpeg_decompress_struct dinfo;
|
| ++ struct my_error_mgr jerr;
|
| ++ djpeg_dest_ptr dst;
|
| ++ FILE *file=NULL;
|
| ++ char *ptr=NULL;
|
| +
|
| +- if((fs=fopen(filename, "wb"))==NULL) _throw(strerror(errno));
|
| +- if(fprintf(fs, "P6\n")<1) _throw("Write error");
|
| +- if(fprintf(fs, "%d %d\n", w, h)<1) _throw("Write error");
|
| +- if(fprintf(fs, "255\n")<1) _throw("Write error");
|
| ++ memset(&dinfo, 0, sizeof(struct jpeg_decompress_struct));
|
| +
|
| +- if((tempbuf=(unsigned char *)malloc(w*h*3))==NULL)
|
| +- _throw("Memory allocation error");
|
| ++ if(!filename || !buf || w<1 || h<1 || srcpf<0 || srcpf>=TJ_NUMPF)
|
| ++ _throw("savebmp(): Invalid argument");
|
| +
|
| +- pixelconvert(buf, f, srcpitch, tempbuf, BMP_RGB, w*3, w, h,
|
| +- srcbottomup);
|
| ++ if((file=fopen(filename, "wb"))==NULL)
|
| ++ _throwunix("savebmp(): Cannot open output file");
|
| +
|
| +- if((fwrite(tempbuf, w*h*3, 1, fs))!=1) _throw("Write error");
|
| ++ dinfo.err=jpeg_std_error(&jerr.pub);
|
| ++ jerr.pub.error_exit=my_error_exit;
|
| ++ jerr.pub.output_message=my_output_message;
|
| +
|
| +- finally:
|
| +- if(tempbuf) free(tempbuf);
|
| +- if(fs) fclose(fs);
|
| +- return retcode;
|
| +-}
|
| ++ if(setjmp(jerr.setjmp_buffer))
|
| ++ {
|
| ++ /* If we get here, the JPEG code has signaled an error. */
|
| ++ retval=-1; goto bailout;
|
| ++ }
|
| +
|
| +-int savebmp(char *filename, unsigned char *buf, int w, int h,
|
| +- enum BMPPIXELFORMAT f, int srcpitch, int srcbottomup)
|
| +-{
|
| +- int fd=-1, byteswritten, dstpitch, retcode=0;
|
| +- int flags=O_RDWR|O_CREAT|O_TRUNC;
|
| +- unsigned char *tempbuf=NULL; char *temp;
|
| +- bmphdr bh; int mode;
|
| ++ jpeg_create_decompress(&dinfo);
|
| ++ if(srcpf==TJPF_GRAY)
|
| ++ {
|
| ++ dinfo.out_color_components=dinfo.output_components=1;
|
| ++ dinfo.out_color_space=JCS_GRAYSCALE;
|
| ++ }
|
| ++ else
|
| ++ {
|
| ++ dinfo.out_color_components=dinfo.output_components=3;
|
| ++ dinfo.out_color_space=JCS_RGB;
|
| ++ }
|
| ++ dinfo.image_width=w; dinfo.image_height=h;
|
| ++ dinfo.global_state=DSTATE_READY;
|
| ++ dinfo.scale_num=dinfo.scale_denom=1;
|
| +
|
| +- #ifdef _WIN32
|
| +- flags|=O_BINARY; mode=_S_IREAD|_S_IWRITE;
|
| +- #else
|
| +- mode=S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
|
| +- #endif
|
| +- if(!filename || !buf || w<1 || h<1 || f<0 || f>BMPPIXELFORMATS-1 || srcpitch<0)
|
| +- _throw("bad argument to savebmp()");
|
| +-
|
| +- if(srcpitch==0) srcpitch=w*ps[f];
|
| +-
|
| +- if((temp=strrchr(filename, '.'))!=NULL)
|
| ++ ptr=strrchr(filename, '.');
|
| ++ if(ptr && !strcasecmp(ptr, ".bmp"))
|
| + {
|
| +- if(!stricmp(temp, ".ppm"))
|
| +- return saveppm(filename, buf, w, h, f, srcpitch, srcbottomup);
|
| ++ if((dst=jinit_write_bmp(&dinfo, 0))==NULL)
|
| ++ _throw("savebmp(): Could not initialize bitmap writer");
|
| + }
|
| ++ else
|
| ++ {
|
| ++ if((dst=jinit_write_ppm(&dinfo))==NULL)
|
| ++ _throw("savebmp(): Could not initialize PPM writer");
|
| ++ }
|
| +
|
| +- _unix(fd=open(filename, flags, mode));
|
| +- dstpitch=((w*3)+3)&(~3);
|
| ++ dst->output_file=file;
|
| ++ (*dst->start_output)(&dinfo, dst);
|
| ++ (*dinfo.mem->realize_virt_arrays)((j_common_ptr)&dinfo);
|
| +
|
| +- bh.bfType=0x4d42;
|
| +- bh.bfSize=BMPHDRSIZE+dstpitch*h;
|
| +- bh.bfReserved1=0; bh.bfReserved2=0;
|
| +- bh.bfOffBits=BMPHDRSIZE;
|
| +- bh.biSize=40;
|
| +- bh.biWidth=w; bh.biHeight=h;
|
| +- bh.biPlanes=0; bh.biBitCount=24;
|
| +- bh.biCompression=BI_RGB; bh.biSizeImage=0;
|
| +- bh.biXPelsPerMeter=0; bh.biYPelsPerMeter=0;
|
| +- bh.biClrUsed=0; bh.biClrImportant=0;
|
| ++ if(srcpf==TJPF_GRAY) dstpf=srcpf;
|
| ++ else dstpf=TJPF_RGB;
|
| ++ srcps=tjPixelSize[srcpf];
|
| +
|
| +- if(!littleendian())
|
| ++ while(dinfo.output_scanline<dinfo.output_height)
|
| + {
|
| +- bh.bfType=byteswap16(bh.bfType);
|
| +- bh.bfSize=byteswap(bh.bfSize);
|
| +- bh.bfOffBits=byteswap(bh.bfOffBits);
|
| +- bh.biSize=byteswap(bh.biSize);
|
| +- bh.biWidth=byteswap(bh.biWidth);
|
| +- bh.biHeight=byteswap(bh.biHeight);
|
| +- bh.biPlanes=byteswap16(bh.biPlanes);
|
| +- bh.biBitCount=byteswap16(bh.biBitCount);
|
| +- bh.biCompression=byteswap(bh.biCompression);
|
| +- bh.biSizeImage=byteswap(bh.biSizeImage);
|
| +- bh.biXPelsPerMeter=byteswap(bh.biXPelsPerMeter);
|
| +- bh.biYPelsPerMeter=byteswap(bh.biYPelsPerMeter);
|
| +- bh.biClrUsed=byteswap(bh.biClrUsed);
|
| +- bh.biClrImportant=byteswap(bh.biClrImportant);
|
| ++ int i, nlines=dst->buffer_height;
|
| ++ for(i=0; i<nlines; i++)
|
| ++ {
|
| ++ unsigned char *inbuf; int row;
|
| ++ row=dinfo.output_scanline+i;
|
| ++ if(bottomup) inbuf=&buf[(h-row-1)*w*srcps];
|
| ++ else inbuf=&buf[row*w*srcps];
|
| ++ pixelconvert(inbuf, srcpf, bottomup, dst->buffer[i], dstpf, 0, w,
|
| ++ nlines);
|
| ++ }
|
| ++ (*dst->put_pixel_rows)(&dinfo, dst, nlines);
|
| ++ dinfo.output_scanline+=nlines;
|
| + }
|
| +
|
| +- writeme(fd, &bh.bfType, sizeof(unsigned short));
|
| +- writeme(fd, &bh.bfSize, sizeof(unsigned int));
|
| +- writeme(fd, &bh.bfReserved1, sizeof(unsigned short));
|
| +- writeme(fd, &bh.bfReserved2, sizeof(unsigned short));
|
| +- writeme(fd, &bh.bfOffBits, sizeof(unsigned int));
|
| +- writeme(fd, &bh.biSize, sizeof(unsigned int));
|
| +- writeme(fd, &bh.biWidth, sizeof(int));
|
| +- writeme(fd, &bh.biHeight, sizeof(int));
|
| +- writeme(fd, &bh.biPlanes, sizeof(unsigned short));
|
| +- writeme(fd, &bh.biBitCount, sizeof(unsigned short));
|
| +- writeme(fd, &bh.biCompression, sizeof(unsigned int));
|
| +- writeme(fd, &bh.biSizeImage, sizeof(unsigned int));
|
| +- writeme(fd, &bh.biXPelsPerMeter, sizeof(int));
|
| +- writeme(fd, &bh.biYPelsPerMeter, sizeof(int));
|
| +- writeme(fd, &bh.biClrUsed, sizeof(unsigned int));
|
| +- writeme(fd, &bh.biClrImportant, sizeof(unsigned int));
|
| ++ (*dst->finish_output)(&dinfo, dst);
|
| +
|
| +- if((tempbuf=(unsigned char *)malloc(dstpitch*h))==NULL)
|
| +- _throw("Memory allocation error");
|
| +-
|
| +- pixelconvert(buf, f, srcpitch, tempbuf, BMP_BGR, dstpitch, w, h,
|
| +- !srcbottomup);
|
| +-
|
| +- if((byteswritten=write(fd, tempbuf, dstpitch*h))!=dstpitch*h)
|
| +- _throw(strerror(errno));
|
| +-
|
| +- finally:
|
| +- if(tempbuf) free(tempbuf);
|
| +- if(fd!=-1) close(fd);
|
| +- return retcode;
|
| ++ bailout:
|
| ++ jpeg_destroy_decompress(&dinfo);
|
| ++ if(file) fclose(file);
|
| ++ return retval;
|
| + }
|
| +
|
| + const char *bmpgeterr(void)
|
| + {
|
| +- return __bmperr;
|
| ++ return errStr;
|
| + }
|
| +Index: bmp.h
|
| +===================================================================
|
| +--- bmp.h (revision 829)
|
| ++++ bmp.h (working copy)
|
| +@@ -1,48 +1,42 @@
|
| +-/* Copyright (C)2004 Landmark Graphics Corporation
|
| +- * Copyright (C)2005 Sun Microsystems, Inc.
|
| ++/*
|
| ++ * Copyright (C)2011 D. R. Commander. All Rights Reserved.
|
| + *
|
| +- * This library is free software and may be redistributed and/or modified under
|
| +- * the terms of the wxWindows Library License, Version 3.1 or (at your option)
|
| +- * any later version. The full license is in the LICENSE.txt file included
|
| +- * with this distribution.
|
| ++ * Redistribution and use in source and binary forms, with or without
|
| ++ * modification, are permitted provided that the following conditions are met:
|
| + *
|
| +- * This library is distributed in the hope that it will be useful,
|
| +- * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| +- * wxWindows Library License for more details.
|
| +-*/
|
| ++ * - Redistributions of source code must retain the above copyright notice,
|
| ++ * this list of conditions and the following disclaimer.
|
| ++ * - Redistributions in binary form must reproduce the above copyright notice,
|
| ++ * this list of conditions and the following disclaimer in the documentation
|
| ++ * and/or other materials provided with the distribution.
|
| ++ * - Neither the name of the libjpeg-turbo Project nor the names of its
|
| ++ * contributors may be used to endorse or promote products derived from this
|
| ++ * software without specific prior written permission.
|
| ++ *
|
| ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
|
| ++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
| ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
| ++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| ++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| ++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
| ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| ++ * POSSIBILITY OF SUCH DAMAGE.
|
| ++ */
|
| +
|
| +-// This provides rudimentary facilities for loading and saving true color
|
| +-// BMP and PPM files
|
| +-
|
| + #ifndef __BMP_H__
|
| + #define __BMP_H__
|
| +
|
| +-#define BMPPIXELFORMATS 6
|
| +-enum BMPPIXELFORMAT {BMP_RGB=0, BMP_RGBA, BMP_BGR, BMP_BGRA, BMP_ABGR, BMP_ARGB};
|
| ++#include "./turbojpeg.h"
|
| +
|
| +-#ifdef __cplusplus
|
| +-extern "C" {
|
| +-#endif
|
| ++int loadbmp(char *filename, unsigned char **buf, int *w, int *h, int pf,
|
| ++ int bottomup);
|
| +
|
| +-// This will load a Windows bitmap from a file and return a buffer with the
|
| +-// specified pixel format, scanline alignment, and orientation. The width and
|
| +-// height are returned in w and h.
|
| ++int savebmp(char *filename, unsigned char *buf, int w, int h, int pf,
|
| ++ int bottomup);
|
| +
|
| +-int loadbmp(char *filename, unsigned char **buf, int *w, int *h,
|
| +- enum BMPPIXELFORMAT f, int align, int dstbottomup);
|
| +-
|
| +-// This will save a buffer with the specified pixel format, pitch, orientation,
|
| +-// width, and height as a 24-bit Windows bitmap or PPM (the filename determines
|
| +-// which format to use)
|
| +-
|
| +-int savebmp(char *filename, unsigned char *buf, int w, int h,
|
| +- enum BMPPIXELFORMAT f, int srcpitch, int srcbottomup);
|
| +-
|
| + const char *bmpgeterr(void);
|
| +
|
| +-#ifdef __cplusplus
|
| +-}
|
| + #endif
|
| +-
|
| +-#endif
|
| +Index: cderror.h
|
| +===================================================================
|
| +--- cderror.h (revision 829)
|
| ++++ cderror.h (working copy)
|
| +@@ -2,6 +2,7 @@
|
| + * cderror.h
|
| + *
|
| + * Copyright (C) 1994-1997, Thomas G. Lane.
|
| ++ * Modified 2009 by Guido Vollbeding.
|
| + * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| +@@ -45,6 +46,7 @@
|
| + JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1")
|
| + JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB")
|
| + JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported")
|
| ++JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image")
|
| + JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM")
|
| + JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image")
|
| + JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image")
|
| +Index: cdjpeg.h
|
| +===================================================================
|
| +--- cdjpeg.h (revision 829)
|
| ++++ cdjpeg.h (working copy)
|
| +@@ -104,6 +104,7 @@
|
| + #define jinit_write_targa jIWrTarga
|
| + #define read_quant_tables RdQTables
|
| + #define read_scan_script RdScnScript
|
| ++#define set_quality_ratings SetQRates
|
| + #define set_quant_slots SetQSlots
|
| + #define set_sample_factors SetSFacts
|
| + #define read_color_map RdCMap
|
| +@@ -131,8 +132,10 @@
|
| + /* cjpeg support routines (in rdswitch.c) */
|
| +
|
| + EXTERN(boolean) read_quant_tables JPP((j_compress_ptr cinfo, char * filename,
|
| +- int scale_factor, boolean force_baseline));
|
| ++ boolean force_baseline));
|
| + EXTERN(boolean) read_scan_script JPP((j_compress_ptr cinfo, char * filename));
|
| ++EXTERN(boolean) set_quality_ratings JPP((j_compress_ptr cinfo, char *arg,
|
| ++ boolean force_baseline));
|
| + EXTERN(boolean) set_quant_slots JPP((j_compress_ptr cinfo, char *arg));
|
| + EXTERN(boolean) set_sample_factors JPP((j_compress_ptr cinfo, char *arg));
|
| +
|
| +Index: cjpeg.c
|
| +===================================================================
|
| +--- cjpeg.c (revision 829)
|
| ++++ cjpeg.c (working copy)
|
| +@@ -1,8 +1,11 @@
|
| + /*
|
| + * cjpeg.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1998, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modified 2003-2011 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2010, 2013, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains a command-line user interface for the JPEG compressor.
|
| +@@ -25,6 +28,7 @@
|
| +
|
| + #include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */
|
| + #include "jversion.h" /* for version message */
|
| ++#include "config.h"
|
| +
|
| + #ifdef USE_CCOMMAND /* command-line reader for Macintosh */
|
| + #ifdef __MWERKS__
|
| +@@ -135,6 +139,7 @@
|
| +
|
| + static const char * progname; /* program name for error messages */
|
| + static char * outfilename; /* for -outfile switch */
|
| ++boolean memdst; /* for -memdst switch */
|
| +
|
| +
|
| + LOCAL(void)
|
| +@@ -149,8 +154,9 @@
|
| + #endif
|
| +
|
| + fprintf(stderr, "Switches (names may be abbreviated):\n");
|
| +- fprintf(stderr, " -quality N Compression quality (0..100; 5-95 is useful range)\n");
|
| ++ fprintf(stderr, " -quality N[,...] Compression quality (0..100; 5-95 is useful range)\n");
|
| + fprintf(stderr, " -grayscale Create monochrome JPEG file\n");
|
| ++ fprintf(stderr, " -rgb Create RGB JPEG file\n");
|
| + #ifdef ENTROPY_OPT_SUPPORTED
|
| + fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n");
|
| + #endif
|
| +@@ -161,6 +167,9 @@
|
| + fprintf(stderr, " -targa Input file is Targa format (usually not needed)\n");
|
| + #endif
|
| + fprintf(stderr, "Switches for advanced users:\n");
|
| ++#ifdef C_ARITH_CODING_SUPPORTED
|
| ++ fprintf(stderr, " -arithmetic Use arithmetic coding\n");
|
| ++#endif
|
| + #ifdef DCT_ISLOW_SUPPORTED
|
| + fprintf(stderr, " -dct int Use integer DCT method%s\n",
|
| + (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
|
| +@@ -179,11 +188,11 @@
|
| + #endif
|
| + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n");
|
| + fprintf(stderr, " -outfile name Specify name for output file\n");
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++ fprintf(stderr, " -memdst Compress to memory instead of file (useful for benchmarking)\n");
|
| ++#endif
|
| + fprintf(stderr, " -verbose or -debug Emit debug output\n");
|
| + fprintf(stderr, "Switches for wizards:\n");
|
| +-#ifdef C_ARITH_CODING_SUPPORTED
|
| +- fprintf(stderr, " -arithmetic Use arithmetic coding\n");
|
| +-#endif
|
| + fprintf(stderr, " -baseline Force baseline quantization tables\n");
|
| + fprintf(stderr, " -qtables file Use quantization tables given in file\n");
|
| + fprintf(stderr, " -qslots N[,...] Set component quantization tables\n");
|
| +@@ -209,10 +218,9 @@
|
| + {
|
| + int argn;
|
| + char * arg;
|
| +- int quality; /* -quality parameter */
|
| +- int q_scale_factor; /* scaling percentage for -qtables */
|
| + boolean force_baseline;
|
| + boolean simple_progressive;
|
| ++ char * qualityarg = NULL; /* saves -quality parm if any */
|
| + char * qtablefile = NULL; /* saves -qtables filename if any */
|
| + char * qslotsarg = NULL; /* saves -qslots parm if any */
|
| + char * samplearg = NULL; /* saves -sample parm if any */
|
| +@@ -219,15 +227,12 @@
|
| + char * scansarg = NULL; /* saves -scans parm if any */
|
| +
|
| + /* Set up default JPEG parameters. */
|
| +- /* Note that default -quality level need not, and does not,
|
| +- * match the default scaling for an explicit -qtables argument.
|
| +- */
|
| +- quality = 75; /* default -quality value */
|
| +- q_scale_factor = 100; /* default to no scaling for -qtables */
|
| ++
|
| + force_baseline = FALSE; /* by default, allow 16-bit quantizers */
|
| + simple_progressive = FALSE;
|
| + is_targa = FALSE;
|
| + outfilename = NULL;
|
| ++ memdst = FALSE;
|
| + cinfo->err->trace_level = 0;
|
| +
|
| + /* Scan command line options, adjust parameters */
|
| +@@ -277,8 +282,11 @@
|
| + static boolean printed_version = FALSE;
|
| +
|
| + if (! printed_version) {
|
| +- fprintf(stderr, "Independent JPEG Group's CJPEG, version %s\n%s\n",
|
| +- JVERSION, JCOPYRIGHT);
|
| ++ fprintf(stderr, "%s version %s (build %s)\n",
|
| ++ PACKAGE_NAME, VERSION, BUILD);
|
| ++ fprintf(stderr, "%s\n\n", JCOPYRIGHT);
|
| ++ fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
|
| ++ JVERSION);
|
| + printed_version = TRUE;
|
| + }
|
| + cinfo->err->trace_level++;
|
| +@@ -287,6 +295,10 @@
|
| + /* Force a monochrome JPEG file to be generated. */
|
| + jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
|
| +
|
| ++ } else if (keymatch(arg, "rgb", 3)) {
|
| ++ /* Force an RGB JPEG file to be generated. */
|
| ++ jpeg_set_colorspace(cinfo, JCS_RGB);
|
| ++
|
| + } else if (keymatch(arg, "maxmemory", 3)) {
|
| + /* Maximum memory in Kb (or Mb with 'm'). */
|
| + long lval;
|
| +@@ -305,7 +317,7 @@
|
| + #ifdef ENTROPY_OPT_SUPPORTED
|
| + cinfo->optimize_coding = TRUE;
|
| + #else
|
| +- fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n",
|
| ++ fprintf(stderr, "%s: sorry, entropy optimization was not compiled in\n",
|
| + progname);
|
| + exit(EXIT_FAILURE);
|
| + #endif
|
| +@@ -322,19 +334,26 @@
|
| + simple_progressive = TRUE;
|
| + /* We must postpone execution until num_components is known. */
|
| + #else
|
| +- fprintf(stderr, "%s: sorry, progressive output was not compiled\n",
|
| ++ fprintf(stderr, "%s: sorry, progressive output was not compiled in\n",
|
| + progname);
|
| + exit(EXIT_FAILURE);
|
| + #endif
|
| +
|
| ++ } else if (keymatch(arg, "memdst", 2)) {
|
| ++ /* Use in-memory destination manager */
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++ memdst = TRUE;
|
| ++#else
|
| ++ fprintf(stderr, "%s: sorry, in-memory destination manager was not compiled in\n",
|
| ++ progname);
|
| ++ exit(EXIT_FAILURE);
|
| ++#endif
|
| ++
|
| + } else if (keymatch(arg, "quality", 1)) {
|
| +- /* Quality factor (quantization table scaling factor). */
|
| ++ /* Quality ratings (quantization table scaling factors). */
|
| + if (++argn >= argc) /* advance to next argument */
|
| + usage();
|
| +- if (sscanf(argv[argn], "%d", &quality) != 1)
|
| +- usage();
|
| +- /* Change scale factor in case -qtables is present. */
|
| +- q_scale_factor = jpeg_quality_scaling(quality);
|
| ++ qualityarg = argv[argn];
|
| +
|
| + } else if (keymatch(arg, "qslots", 2)) {
|
| + /* Quantization table slot numbers. */
|
| +@@ -382,7 +401,7 @@
|
| + * default sampling factors.
|
| + */
|
| +
|
| +- } else if (keymatch(arg, "scans", 2)) {
|
| ++ } else if (keymatch(arg, "scans", 4)) {
|
| + /* Set scan script. */
|
| + #ifdef C_MULTISCAN_FILES_SUPPORTED
|
| + if (++argn >= argc) /* advance to next argument */
|
| +@@ -390,7 +409,7 @@
|
| + scansarg = argv[argn];
|
| + /* We must postpone reading the file in case -progressive appears. */
|
| + #else
|
| +- fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n",
|
| ++ fprintf(stderr, "%s: sorry, multi-scan output was not compiled in\n",
|
| + progname);
|
| + exit(EXIT_FAILURE);
|
| + #endif
|
| +@@ -422,11 +441,12 @@
|
| +
|
| + /* Set quantization tables for selected quality. */
|
| + /* Some or all may be overridden if -qtables is present. */
|
| +- jpeg_set_quality(cinfo, quality, force_baseline);
|
| ++ if (qualityarg != NULL) /* process -quality if it was present */
|
| ++ if (! set_quality_ratings(cinfo, qualityarg, force_baseline))
|
| ++ usage();
|
| +
|
| + if (qtablefile != NULL) /* process -qtables if it was present */
|
| +- if (! read_quant_tables(cinfo, qtablefile,
|
| +- q_scale_factor, force_baseline))
|
| ++ if (! read_quant_tables(cinfo, qtablefile, force_baseline))
|
| + usage();
|
| +
|
| + if (qslotsarg != NULL) /* process -qslots if it was present */
|
| +@@ -468,7 +488,9 @@
|
| + int file_index;
|
| + cjpeg_source_ptr src_mgr;
|
| + FILE * input_file;
|
| +- FILE * output_file;
|
| ++ FILE * output_file = NULL;
|
| ++ unsigned char *outbuffer = NULL;
|
| ++ unsigned long outsize = 0;
|
| + JDIMENSION num_scanlines;
|
| +
|
| + /* On Mac, fetch a command line. */
|
| +@@ -511,20 +533,22 @@
|
| + file_index = parse_switches(&cinfo, argc, argv, 0, FALSE);
|
| +
|
| + #ifdef TWO_FILE_COMMANDLINE
|
| +- /* Must have either -outfile switch or explicit output file name */
|
| +- if (outfilename == NULL) {
|
| +- if (file_index != argc-2) {
|
| +- fprintf(stderr, "%s: must name one input and one output file\n",
|
| +- progname);
|
| +- usage();
|
| ++ if (!memdst) {
|
| ++ /* Must have either -outfile switch or explicit output file name */
|
| ++ if (outfilename == NULL) {
|
| ++ if (file_index != argc-2) {
|
| ++ fprintf(stderr, "%s: must name one input and one output file\n",
|
| ++ progname);
|
| ++ usage();
|
| ++ }
|
| ++ outfilename = argv[file_index+1];
|
| ++ } else {
|
| ++ if (file_index != argc-1) {
|
| ++ fprintf(stderr, "%s: must name one input and one output file\n",
|
| ++ progname);
|
| ++ usage();
|
| ++ }
|
| + }
|
| +- outfilename = argv[file_index+1];
|
| +- } else {
|
| +- if (file_index != argc-1) {
|
| +- fprintf(stderr, "%s: must name one input and one output file\n",
|
| +- progname);
|
| +- usage();
|
| +- }
|
| + }
|
| + #else
|
| + /* Unix style: expect zero or one file name */
|
| +@@ -551,7 +575,7 @@
|
| + fprintf(stderr, "%s: can't open %s\n", progname, outfilename);
|
| + exit(EXIT_FAILURE);
|
| + }
|
| +- } else {
|
| ++ } else if (!memdst) {
|
| + /* default output file is stdout */
|
| + output_file = write_stdout();
|
| }
|
| +@@ -574,7 +598,12 @@
|
| + file_index = parse_switches(&cinfo, argc, argv, 0, TRUE);
|
| +
|
| + /* Specify data destination for compression */
|
| +- jpeg_stdio_dest(&cinfo, output_file);
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++ if (memdst)
|
| ++ jpeg_mem_dest(&cinfo, &outbuffer, &outsize);
|
| ++ else
|
| ++#endif
|
| ++ jpeg_stdio_dest(&cinfo, output_file);
|
| +
|
| + /* Start compressor */
|
| + jpeg_start_compress(&cinfo, TRUE);
|
| +@@ -593,7 +622,7 @@
|
| + /* Close files, if we opened them */
|
| + if (input_file != stdin)
|
| + fclose(input_file);
|
| +- if (output_file != stdout)
|
| ++ if (output_file != stdout && output_file != NULL)
|
| + fclose(output_file);
|
| +
|
| + #ifdef PROGRESS_REPORT
|
| +@@ -600,6 +629,12 @@
|
| + end_progress_monitor((j_common_ptr) &cinfo);
|
| + #endif
|
|
|
| - if (cinfo->marker->discarded_bytes != 0) {
|
| -- WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
|
| -+ TRACEMS2(cinfo, 1, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
|
| - cinfo->marker->discarded_bytes = 0;
|
| ++ if (memdst) {
|
| ++ fprintf(stderr, "Compressed size: %lu bytes\n", outsize);
|
| ++ if (outbuffer != NULL)
|
| ++ free(outbuffer);
|
| ++ }
|
| ++
|
| + /* All done. */
|
| + exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
|
| + return 0; /* suppress no-return-value warnings */
|
| +Index: djpeg.c
|
| +===================================================================
|
| +--- djpeg.c (revision 829)
|
| ++++ djpeg.c (working copy)
|
| +@@ -1,8 +1,11 @@
|
| + /*
|
| + * djpeg.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2010-2011, 2013-2015, D. R. Commander.
|
| ++ * Copyright (C) 2015, Google, Inc.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains a command-line user interface for the JPEG decompressor.
|
| +@@ -25,6 +28,7 @@
|
| +
|
| + #include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */
|
| + #include "jversion.h" /* for version message */
|
| ++#include "config.h"
|
| +
|
| + #include <ctype.h> /* to declare isprint() */
|
| +
|
| +@@ -84,6 +88,10 @@
|
| +
|
| + static const char * progname; /* program name for error messages */
|
| + static char * outfilename; /* for -outfile switch */
|
| ++boolean memsrc; /* for -memsrc switch */
|
| ++boolean strip, skip;
|
| ++JDIMENSION startY, endY;
|
| ++#define INPUT_BUF_SIZE 4096
|
| +
|
| +
|
| + LOCAL(void)
|
| +@@ -101,6 +109,7 @@
|
| + fprintf(stderr, " -colors N Reduce image to no more than N colors\n");
|
| + fprintf(stderr, " -fast Fast, low-quality processing\n");
|
| + fprintf(stderr, " -grayscale Force grayscale output\n");
|
| ++ fprintf(stderr, " -rgb Force RGB output\n");
|
| + #ifdef IDCT_SCALING_SUPPORTED
|
| + fprintf(stderr, " -scale M/N Scale output image by fraction M/N, eg, 1/8\n");
|
| + #endif
|
| +@@ -153,6 +162,12 @@
|
| + #endif
|
| + fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n");
|
| + fprintf(stderr, " -outfile name Specify name for output file\n");
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++ fprintf(stderr, " -memsrc Load input file into memory before decompressing\n");
|
| ++#endif
|
| ++
|
| ++ fprintf(stderr, " -skip Y0,Y1 Decode all rows except those between Y0 and Y1 (inclusive)\n");
|
| ++ fprintf(stderr, " -strip Y0,Y1 Decode only rows between Y0 and Y1 (inclusive)\n");
|
| + fprintf(stderr, " -verbose or -debug Emit debug output\n");
|
| + exit(EXIT_FAILURE);
|
| + }
|
| +@@ -176,6 +191,9 @@
|
| + /* Set up default JPEG parameters. */
|
| + requested_fmt = DEFAULT_FMT; /* set default output file format */
|
| + outfilename = NULL;
|
| ++ memsrc = FALSE;
|
| ++ strip = FALSE;
|
| ++ skip = FALSE;
|
| + cinfo->err->trace_level = 0;
|
| +
|
| + /* Scan command line options, adjust parameters */
|
| +@@ -240,8 +258,11 @@
|
| + static boolean printed_version = FALSE;
|
| +
|
| + if (! printed_version) {
|
| +- fprintf(stderr, "Independent JPEG Group's DJPEG, version %s\n%s\n",
|
| +- JVERSION, JCOPYRIGHT);
|
| ++ fprintf(stderr, "%s version %s (build %s)\n",
|
| ++ PACKAGE_NAME, VERSION, BUILD);
|
| ++ fprintf(stderr, "%s\n\n", JCOPYRIGHT);
|
| ++ fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
|
| ++ JVERSION);
|
| + printed_version = TRUE;
|
| + }
|
| + cinfo->err->trace_level++;
|
| +@@ -263,6 +284,10 @@
|
| + /* Force monochrome output. */
|
| + cinfo->out_color_space = JCS_GRAYSCALE;
|
| +
|
| ++ } else if (keymatch(arg, "rgb", 2)) {
|
| ++ /* Force RGB output. */
|
| ++ cinfo->out_color_space = JCS_RGB;
|
| ++
|
| + } else if (keymatch(arg, "map", 3)) {
|
| + /* Quantize to a color map taken from an input file. */
|
| + if (++argn >= argc) /* advance to next argument */
|
| +@@ -314,6 +339,16 @@
|
| + usage();
|
| + outfilename = argv[argn]; /* save it away for later use */
|
| +
|
| ++ } else if (keymatch(arg, "memsrc", 2)) {
|
| ++ /* Use in-memory source manager */
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++ memsrc = TRUE;
|
| ++#else
|
| ++ fprintf(stderr, "%s: sorry, in-memory source manager was not compiled in\n",
|
| ++ progname);
|
| ++ exit(EXIT_FAILURE);
|
| ++#endif
|
| ++
|
| + } else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) {
|
| + /* PPM/PGM output format. */
|
| + requested_fmt = FMT_PPM;
|
| +@@ -322,7 +357,7 @@
|
| + /* RLE output format. */
|
| + requested_fmt = FMT_RLE;
|
| +
|
| +- } else if (keymatch(arg, "scale", 1)) {
|
| ++ } else if (keymatch(arg, "scale", 2)) {
|
| + /* Scale the output image by a fraction M/N. */
|
| + if (++argn >= argc) /* advance to next argument */
|
| + usage();
|
| +@@ -330,6 +365,20 @@
|
| + &cinfo->scale_num, &cinfo->scale_denom) != 2)
|
| + usage();
|
| +
|
| ++ } else if (keymatch(arg, "strip", 2)) {
|
| ++ if (++argn >= argc)
|
| ++ usage();
|
| ++ if (sscanf(argv[argn], "%d,%d", &startY, &endY) != 2 || startY > endY)
|
| ++ usage();
|
| ++ strip = TRUE;
|
| ++
|
| ++ } else if (keymatch(arg, "skip", 2)) {
|
| ++ if (++argn >= argc)
|
| ++ usage();
|
| ++ if (sscanf(argv[argn], "%d,%d", &startY, &endY) != 2 || startY > endY)
|
| ++ usage();
|
| ++ skip = TRUE;
|
| ++
|
| + } else if (keymatch(arg, "targa", 1)) {
|
| + /* Targa output format. */
|
| + requested_fmt = FMT_TARGA;
|
| +@@ -432,6 +481,8 @@
|
| + djpeg_dest_ptr dest_mgr = NULL;
|
| + FILE * input_file;
|
| + FILE * output_file;
|
| ++ unsigned char *inbuffer = NULL;
|
| ++ unsigned long insize = 0;
|
| + JDIMENSION num_scanlines;
|
| +
|
| + /* On Mac, fetch a command line. */
|
| +@@ -455,7 +506,7 @@
|
| + * APP12 is used by some digital camera makers for textual info,
|
| + * so we provide the ability to display it as text.
|
| + * If you like, additional APPn marker types can be selected for display,
|
| +- * but don't try to override APP0 or APP14 this way (see libjpeg.doc).
|
| ++ * but don't try to override APP0 or APP14 this way (see libjpeg.txt).
|
| + */
|
| + jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker);
|
| + jpeg_set_marker_processor(&cinfo, JPEG_APP0+12, print_text_marker);
|
| +@@ -526,7 +577,30 @@
|
| + #endif
|
| +
|
| + /* Specify data source for decompression */
|
| +- jpeg_stdio_src(&cinfo, input_file);
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++ if (memsrc) {
|
| ++ size_t nbytes;
|
| ++ do {
|
| ++ inbuffer = (unsigned char *)realloc(inbuffer, insize + INPUT_BUF_SIZE);
|
| ++ if (inbuffer == NULL) {
|
| ++ fprintf(stderr, "%s: memory allocation failure\n", progname);
|
| ++ exit(EXIT_FAILURE);
|
| ++ }
|
| ++ nbytes = JFREAD(input_file, &inbuffer[insize], INPUT_BUF_SIZE);
|
| ++ if (nbytes < INPUT_BUF_SIZE && ferror(input_file)) {
|
| ++ if (file_index < argc)
|
| ++ fprintf(stderr, "%s: can't read from %s\n", progname,
|
| ++ argv[file_index]);
|
| ++ else
|
| ++ fprintf(stderr, "%s: can't read from stdin\n", progname);
|
| ++ }
|
| ++ insize += (unsigned long)nbytes;
|
| ++ } while (nbytes == INPUT_BUF_SIZE);
|
| ++ fprintf(stderr, "Compressed size: %lu bytes\n", insize);
|
| ++ jpeg_mem_src(&cinfo, inbuffer, insize);
|
| ++ } else
|
| ++#endif
|
| ++ jpeg_stdio_src(&cinfo, input_file);
|
| +
|
| + /* Read file header, set default decompression parameters */
|
| + (void) jpeg_read_header(&cinfo, TRUE);
|
| +@@ -575,14 +649,64 @@
|
| + /* Start decompressor */
|
| + (void) jpeg_start_decompress(&cinfo);
|
| +
|
| +- /* Write output file header */
|
| +- (*dest_mgr->start_output) (&cinfo, dest_mgr);
|
| ++ /* Strip decode */
|
| ++ if (strip || skip) {
|
| ++ JDIMENSION tmp;
|
| +
|
| +- /* Process data */
|
| +- while (cinfo.output_scanline < cinfo.output_height) {
|
| +- num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
|
| +- dest_mgr->buffer_height);
|
| +- (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
|
| ++ /* Check for valid endY. We cannot check this value until after
|
| ++ * jpeg_start_decompress() is called. Note that we have already verified
|
| ++ * that startY <= endY.
|
| ++ */
|
| ++ if (endY > cinfo.output_height - 1) {
|
| ++ fprintf(stderr, "%s: strip %d-%d exceeds image height %d\n", progname,
|
| ++ startY, endY, cinfo.output_height);
|
| ++ exit(EXIT_FAILURE);
|
| ++ }
|
| ++
|
| ++ /* Write output file header. This is a hack to ensure that the destination
|
| ++ * manager creates an image of the proper size for the partial decode.
|
| ++ */
|
| ++ tmp = cinfo.output_height;
|
| ++ cinfo.output_height = endY - startY + 1;
|
| ++ if (skip)
|
| ++ cinfo.output_height = tmp - cinfo.output_height;
|
| ++ (*dest_mgr->start_output) (&cinfo, dest_mgr);
|
| ++ cinfo.output_height = tmp;
|
| ++
|
| ++ /* Process data */
|
| ++ if (skip) {
|
| ++ while (cinfo.output_scanline < startY) {
|
| ++ num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
|
| ++ dest_mgr->buffer_height);
|
| ++ (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
|
| ++ }
|
| ++ jpeg_skip_scanlines(&cinfo, endY - startY + 1);
|
| ++ while (cinfo.output_scanline < cinfo.output_height) {
|
| ++ num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
|
| ++ dest_mgr->buffer_height);
|
| ++ (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
|
| ++ }
|
| ++ } else {
|
| ++ jpeg_skip_scanlines(&cinfo, startY);
|
| ++ while (cinfo.output_scanline <= endY) {
|
| ++ num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
|
| ++ dest_mgr->buffer_height);
|
| ++ (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
|
| ++ }
|
| ++ jpeg_skip_scanlines(&cinfo, cinfo.output_height - endY + 1);
|
| ++ }
|
| ++
|
| ++ /* Normal full image decode */
|
| ++ } else {
|
| ++ /* Write output file header */
|
| ++ (*dest_mgr->start_output) (&cinfo, dest_mgr);
|
| ++
|
| ++ /* Process data */
|
| ++ while (cinfo.output_scanline < cinfo.output_height) {
|
| ++ num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
|
| ++ dest_mgr->buffer_height);
|
| ++ (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
|
| ++ }
|
| }
|
|
|
| -@@ -944,7 +944,144 @@
|
| - return TRUE;
|
| + #ifdef PROGRESS_REPORT
|
| +@@ -610,6 +734,9 @@
|
| + end_progress_monitor((j_common_ptr) &cinfo);
|
| + #endif
|
| +
|
| ++ if (memsrc && inbuffer != NULL)
|
| ++ free(inbuffer);
|
| ++
|
| + /* All done. */
|
| + exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
|
| + return 0; /* suppress no-return-value warnings */
|
| +Index: jcapimin.c
|
| +===================================================================
|
| +--- jcapimin.c (revision 829)
|
| ++++ jcapimin.c (working copy)
|
| +@@ -2,6 +2,7 @@
|
| + * jcapimin.c
|
| + *
|
| + * Copyright (C) 1994-1998, Thomas G. Lane.
|
| ++ * Modified 2003-2010 by Guido Vollbeding.
|
| + * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| +@@ -63,8 +64,12 @@
|
| +
|
| + cinfo->comp_info = NULL;
|
| +
|
| +- for (i = 0; i < NUM_QUANT_TBLS; i++)
|
| ++ for (i = 0; i < NUM_QUANT_TBLS; i++) {
|
| + cinfo->quant_tbl_ptrs[i] = NULL;
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ cinfo->q_scale_factor[i] = 100;
|
| ++#endif
|
| ++ }
|
| +
|
| + for (i = 0; i < NUM_HUFF_TBLS; i++) {
|
| + cinfo->dc_huff_tbl_ptrs[i] = NULL;
|
| +@@ -71,6 +76,13 @@
|
| + cinfo->ac_huff_tbl_ptrs[i] = NULL;
|
| + }
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++ /* Must do it here for emit_dqt in case jpeg_write_tables is used */
|
| ++ cinfo->block_size = DCTSIZE;
|
| ++ cinfo->natural_order = jpeg_natural_order;
|
| ++ cinfo->lim_Se = DCTSIZE2-1;
|
| ++#endif
|
| ++
|
| + cinfo->script_space = NULL;
|
| +
|
| + cinfo->input_gamma = 1.0; /* in case application forgets */
|
| +Index: jccolor.c
|
| +===================================================================
|
| +--- jccolor.c (revision 829)
|
| ++++ jccolor.c (working copy)
|
| +@@ -1,10 +1,11 @@
|
| + /*
|
| + * jccolor.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1996, Thomas G. Lane.
|
| ++ * libjpeg-turbo Modifications:
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +- * Copyright 2009 D. R. Commander
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Copyright (C) 2009-2012, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains input colorspace conversion routines.
|
| +@@ -14,6 +15,7 @@
|
| + #include "jinclude.h"
|
| + #include "jpeglib.h"
|
| + #include "jsimd.h"
|
| ++#include "config.h"
|
| +
|
| +
|
| + /* Private subobject */
|
| +@@ -81,6 +83,111 @@
|
| + #define TABLE_SIZE (8*(MAXJSAMPLE+1))
|
| +
|
| +
|
| ++/* Include inline routines for colorspace extensions */
|
| ++
|
| ++#include "jccolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++
|
| ++#define RGB_RED EXT_RGB_RED
|
| ++#define RGB_GREEN EXT_RGB_GREEN
|
| ++#define RGB_BLUE EXT_RGB_BLUE
|
| ++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| ++#define rgb_ycc_convert_internal extrgb_ycc_convert_internal
|
| ++#define rgb_gray_convert_internal extrgb_gray_convert_internal
|
| ++#define rgb_rgb_convert_internal extrgb_rgb_convert_internal
|
| ++#include "jccolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef rgb_ycc_convert_internal
|
| ++#undef rgb_gray_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_RGBX_RED
|
| ++#define RGB_GREEN EXT_RGBX_GREEN
|
| ++#define RGB_BLUE EXT_RGBX_BLUE
|
| ++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| ++#define rgb_ycc_convert_internal extrgbx_ycc_convert_internal
|
| ++#define rgb_gray_convert_internal extrgbx_gray_convert_internal
|
| ++#define rgb_rgb_convert_internal extrgbx_rgb_convert_internal
|
| ++#include "jccolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef rgb_ycc_convert_internal
|
| ++#undef rgb_gray_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_BGR_RED
|
| ++#define RGB_GREEN EXT_BGR_GREEN
|
| ++#define RGB_BLUE EXT_BGR_BLUE
|
| ++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| ++#define rgb_ycc_convert_internal extbgr_ycc_convert_internal
|
| ++#define rgb_gray_convert_internal extbgr_gray_convert_internal
|
| ++#define rgb_rgb_convert_internal extbgr_rgb_convert_internal
|
| ++#include "jccolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef rgb_ycc_convert_internal
|
| ++#undef rgb_gray_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_BGRX_RED
|
| ++#define RGB_GREEN EXT_BGRX_GREEN
|
| ++#define RGB_BLUE EXT_BGRX_BLUE
|
| ++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| ++#define rgb_ycc_convert_internal extbgrx_ycc_convert_internal
|
| ++#define rgb_gray_convert_internal extbgrx_gray_convert_internal
|
| ++#define rgb_rgb_convert_internal extbgrx_rgb_convert_internal
|
| ++#include "jccolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef rgb_ycc_convert_internal
|
| ++#undef rgb_gray_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_XBGR_RED
|
| ++#define RGB_GREEN EXT_XBGR_GREEN
|
| ++#define RGB_BLUE EXT_XBGR_BLUE
|
| ++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| ++#define rgb_ycc_convert_internal extxbgr_ycc_convert_internal
|
| ++#define rgb_gray_convert_internal extxbgr_gray_convert_internal
|
| ++#define rgb_rgb_convert_internal extxbgr_rgb_convert_internal
|
| ++#include "jccolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef rgb_ycc_convert_internal
|
| ++#undef rgb_gray_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_XRGB_RED
|
| ++#define RGB_GREEN EXT_XRGB_GREEN
|
| ++#define RGB_BLUE EXT_XRGB_BLUE
|
| ++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| ++#define rgb_ycc_convert_internal extxrgb_ycc_convert_internal
|
| ++#define rgb_gray_convert_internal extxrgb_gray_convert_internal
|
| ++#define rgb_rgb_convert_internal extxrgb_rgb_convert_internal
|
| ++#include "jccolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef rgb_ycc_convert_internal
|
| ++#undef rgb_gray_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++
|
| + /*
|
| + * Initialize for RGB->YCC colorspace conversion.
|
| + */
|
| +@@ -119,14 +226,6 @@
|
| +
|
| + /*
|
| + * Convert some rows of samples to the JPEG colorspace.
|
| +- *
|
| +- * Note that we change from the application's interleaved-pixel format
|
| +- * to our internal noninterleaved, one-plane-per-component format.
|
| +- * The input buffer is therefore three times as wide as the output buffer.
|
| +- *
|
| +- * A starting row offset is provided only for the output buffer. The caller
|
| +- * can easily adjust the passed input_buf value to accommodate any row
|
| +- * offset required on that side.
|
| + */
|
| +
|
| + METHODDEF(void)
|
| +@@ -134,43 +233,39 @@
|
| + JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| + JDIMENSION output_row, int num_rows)
|
| + {
|
| +- my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
|
| +- register int r, g, b;
|
| +- register INT32 * ctab = cconvert->rgb_ycc_tab;
|
| +- register JSAMPROW inptr;
|
| +- register JSAMPROW outptr0, outptr1, outptr2;
|
| +- register JDIMENSION col;
|
| +- JDIMENSION num_cols = cinfo->image_width;
|
| +-
|
| +- while (--num_rows >= 0) {
|
| +- inptr = *input_buf++;
|
| +- outptr0 = output_buf[0][output_row];
|
| +- outptr1 = output_buf[1][output_row];
|
| +- outptr2 = output_buf[2][output_row];
|
| +- output_row++;
|
| +- for (col = 0; col < num_cols; col++) {
|
| +- r = GETJSAMPLE(inptr[rgb_red[cinfo->in_color_space]]);
|
| +- g = GETJSAMPLE(inptr[rgb_green[cinfo->in_color_space]]);
|
| +- b = GETJSAMPLE(inptr[rgb_blue[cinfo->in_color_space]]);
|
| +- inptr += rgb_pixelsize[cinfo->in_color_space];
|
| +- /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
|
| +- * must be too; we do not need an explicit range-limiting operation.
|
| +- * Hence the value being shifted is never negative, and we don't
|
| +- * need the general RIGHT_SHIFT macro.
|
| +- */
|
| +- /* Y */
|
| +- outptr0[col] = (JSAMPLE)
|
| +- ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
|
| +- >> SCALEBITS);
|
| +- /* Cb */
|
| +- outptr1[col] = (JSAMPLE)
|
| +- ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
|
| +- >> SCALEBITS);
|
| +- /* Cr */
|
| +- outptr2[col] = (JSAMPLE)
|
| +- ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
|
| +- >> SCALEBITS);
|
| +- }
|
| ++ switch (cinfo->in_color_space) {
|
| ++ case JCS_EXT_RGB:
|
| ++ extrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| ++ extrgbx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGR:
|
| ++ extbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| ++ extbgrx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| ++ extxbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| ++ extxrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ default:
|
| ++ rgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| + }
|
| }
|
|
|
| -+#ifdef MOTION_JPEG_SUPPORTED
|
| +@@ -180,9 +275,6 @@
|
|
|
| -+/* The default Huffman tables used by motion JPEG frames. When a motion JPEG
|
| -+ * frame does not have DHT tables, we should use the huffman tables suggested by
|
| -+ * the JPEG standard. Each of these tables represents a member of the JHUFF_TBLS
|
| -+ * struct so we can just copy it to the according JHUFF_TBLS member.
|
| + /*
|
| + * Convert some rows of samples to the JPEG colorspace.
|
| +- * This version handles RGB->grayscale conversion, which is the same
|
| +- * as the RGB->Y portion of RGB->YCbCr.
|
| +- * We assume rgb_ycc_start has been called (we only use the Y tables).
|
| + */
|
| +
|
| + METHODDEF(void)
|
| +@@ -190,28 +282,85 @@
|
| + JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| + JDIMENSION output_row, int num_rows)
|
| + {
|
| +- my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
|
| +- register int r, g, b;
|
| +- register INT32 * ctab = cconvert->rgb_ycc_tab;
|
| +- register JSAMPROW inptr;
|
| +- register JSAMPROW outptr;
|
| +- register JDIMENSION col;
|
| +- JDIMENSION num_cols = cinfo->image_width;
|
| ++ switch (cinfo->in_color_space) {
|
| ++ case JCS_EXT_RGB:
|
| ++ extrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| ++ extrgbx_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGR:
|
| ++ extbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| ++ extbgrx_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| ++ extxbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| ++ extxrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ default:
|
| ++ rgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ }
|
| ++}
|
| +
|
| +- while (--num_rows >= 0) {
|
| +- inptr = *input_buf++;
|
| +- outptr = output_buf[0][output_row];
|
| +- output_row++;
|
| +- for (col = 0; col < num_cols; col++) {
|
| +- r = GETJSAMPLE(inptr[rgb_red[cinfo->in_color_space]]);
|
| +- g = GETJSAMPLE(inptr[rgb_green[cinfo->in_color_space]]);
|
| +- b = GETJSAMPLE(inptr[rgb_blue[cinfo->in_color_space]]);
|
| +- inptr += rgb_pixelsize[cinfo->in_color_space];
|
| +- /* Y */
|
| +- outptr[col] = (JSAMPLE)
|
| +- ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
|
| +- >> SCALEBITS);
|
| +- }
|
| ++
|
| ++/*
|
| ++ * Extended RGB to plain RGB conversion
|
| + */
|
| -+/* DC table 0 */
|
| -+LOCAL(const unsigned char) mjpg_dc0_bits[] = {
|
| -+ 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01,
|
| -+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
| ++
|
| ++METHODDEF(void)
|
| ++rgb_rgb_convert (j_compress_ptr cinfo,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows)
|
| ++{
|
| ++ switch (cinfo->in_color_space) {
|
| ++ case JCS_EXT_RGB:
|
| ++ extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| ++ extrgbx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGR:
|
| ++ extbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| ++ extbgrx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| ++ extxbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| ++ extxrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| ++ default:
|
| ++ rgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
|
| ++ num_rows);
|
| ++ break;
|
| + }
|
| + }
|
| +
|
| +@@ -377,6 +526,10 @@
|
| + case JCS_EXT_BGRX:
|
| + case JCS_EXT_XBGR:
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_RGBA:
|
| ++ case JCS_EXT_BGRA:
|
| ++ case JCS_EXT_ABGR:
|
| ++ case JCS_EXT_ARGB:
|
| + if (cinfo->input_components != rgb_pixelsize[cinfo->in_color_space])
|
| + ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
|
| + break;
|
| +@@ -411,9 +564,17 @@
|
| + cinfo->in_color_space == JCS_EXT_BGR ||
|
| + cinfo->in_color_space == JCS_EXT_BGRX ||
|
| + cinfo->in_color_space == JCS_EXT_XBGR ||
|
| +- cinfo->in_color_space == JCS_EXT_XRGB) {
|
| +- cconvert->pub.start_pass = rgb_ycc_start;
|
| +- cconvert->pub.color_convert = rgb_gray_convert;
|
| ++ cinfo->in_color_space == JCS_EXT_XRGB ||
|
| ++ cinfo->in_color_space == JCS_EXT_RGBA ||
|
| ++ cinfo->in_color_space == JCS_EXT_BGRA ||
|
| ++ cinfo->in_color_space == JCS_EXT_ABGR ||
|
| ++ cinfo->in_color_space == JCS_EXT_ARGB) {
|
| ++ if (jsimd_can_rgb_gray())
|
| ++ cconvert->pub.color_convert = jsimd_rgb_gray_convert;
|
| ++ else {
|
| ++ cconvert->pub.start_pass = rgb_ycc_start;
|
| ++ cconvert->pub.color_convert = rgb_gray_convert;
|
| ++ }
|
| + } else if (cinfo->in_color_space == JCS_YCbCr)
|
| + cconvert->pub.color_convert = grayscale_convert;
|
| + else
|
| +@@ -421,17 +582,25 @@
|
| + break;
|
| +
|
| + case JCS_RGB:
|
| +- case JCS_EXT_RGB:
|
| +- case JCS_EXT_RGBX:
|
| +- case JCS_EXT_BGR:
|
| +- case JCS_EXT_BGRX:
|
| +- case JCS_EXT_XBGR:
|
| +- case JCS_EXT_XRGB:
|
| + if (cinfo->num_components != 3)
|
| + ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
| +- if (cinfo->in_color_space == cinfo->jpeg_color_space &&
|
| +- rgb_pixelsize[cinfo->in_color_space] == 3)
|
| ++ if (rgb_red[cinfo->in_color_space] == 0 &&
|
| ++ rgb_green[cinfo->in_color_space] == 1 &&
|
| ++ rgb_blue[cinfo->in_color_space] == 2 &&
|
| ++ rgb_pixelsize[cinfo->in_color_space] == 3)
|
| + cconvert->pub.color_convert = null_convert;
|
| ++ else if (cinfo->in_color_space == JCS_RGB ||
|
| ++ cinfo->in_color_space == JCS_EXT_RGB ||
|
| ++ cinfo->in_color_space == JCS_EXT_RGBX ||
|
| ++ cinfo->in_color_space == JCS_EXT_BGR ||
|
| ++ cinfo->in_color_space == JCS_EXT_BGRX ||
|
| ++ cinfo->in_color_space == JCS_EXT_XBGR ||
|
| ++ cinfo->in_color_space == JCS_EXT_XRGB ||
|
| ++ cinfo->in_color_space == JCS_EXT_RGBA ||
|
| ++ cinfo->in_color_space == JCS_EXT_BGRA ||
|
| ++ cinfo->in_color_space == JCS_EXT_ABGR ||
|
| ++ cinfo->in_color_space == JCS_EXT_ARGB)
|
| ++ cconvert->pub.color_convert = rgb_rgb_convert;
|
| + else
|
| + ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
| + break;
|
| +@@ -445,7 +614,11 @@
|
| + cinfo->in_color_space == JCS_EXT_BGR ||
|
| + cinfo->in_color_space == JCS_EXT_BGRX ||
|
| + cinfo->in_color_space == JCS_EXT_XBGR ||
|
| +- cinfo->in_color_space == JCS_EXT_XRGB) {
|
| ++ cinfo->in_color_space == JCS_EXT_XRGB ||
|
| ++ cinfo->in_color_space == JCS_EXT_RGBA ||
|
| ++ cinfo->in_color_space == JCS_EXT_BGRA ||
|
| ++ cinfo->in_color_space == JCS_EXT_ABGR ||
|
| ++ cinfo->in_color_space == JCS_EXT_ARGB) {
|
| + if (jsimd_can_rgb_ycc())
|
| + cconvert->pub.color_convert = jsimd_rgb_ycc_convert;
|
| + else {
|
| +Index: jcdctmgr.c
|
| +===================================================================
|
| +--- jcdctmgr.c (revision 829)
|
| ++++ jcdctmgr.c (working copy)
|
| +@@ -1,10 +1,12 @@
|
| + /*
|
| + * jcdctmgr.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1994-1996, Thomas G. Lane.
|
| ++ * libjpeg-turbo Modifications:
|
| + * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Copyright (C) 2011 D. R. Commander
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains the forward-DCT management logic.
|
| +@@ -39,6 +41,8 @@
|
| + (JCOEFPTR coef_block, FAST_FLOAT * divisors,
|
| + FAST_FLOAT * workspace));
|
| +
|
| ++METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
|
| ++
|
| + typedef struct {
|
| + struct jpeg_forward_dct pub; /* public fields */
|
| +
|
| +@@ -73,7 +77,7 @@
|
| + * Find the highest bit in an integer through binary search.
|
| + */
|
| + LOCAL(int)
|
| +-fls (UINT16 val)
|
| ++flss (UINT16 val)
|
| + {
|
| + int bit;
|
| +
|
| +@@ -160,7 +164,7 @@
|
| + * of in a consecutive manner, yet again in order to allow SIMD
|
| + * routines.
|
| + */
|
| +-LOCAL(void)
|
| ++LOCAL(int)
|
| + compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
|
| + {
|
| + UDCTELEM2 fq, fr;
|
| +@@ -167,7 +171,7 @@
|
| + UDCTELEM c;
|
| + int b, r;
|
| +
|
| +- b = fls(divisor) - 1;
|
| ++ b = flss(divisor) - 1;
|
| + r = sizeof(DCTELEM) * 8 + b;
|
| +
|
| + fq = ((UDCTELEM2)1 << r) / divisor;
|
| +@@ -179,7 +183,7 @@
|
| + /* fq will be one bit too large to fit in DCTELEM, so adjust */
|
| + fq >>= 1;
|
| + r--;
|
| +- } else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */
|
| ++ } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
|
| + c++;
|
| + } else { /* fractional part is > 0.5 */
|
| + fq++;
|
| +@@ -189,6 +193,9 @@
|
| + dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
|
| + dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
|
| + dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
|
| ++
|
| ++ if(r <= 16) return 0;
|
| ++ else return 1;
|
| + }
|
| +
|
| + /*
|
| +@@ -232,7 +239,9 @@
|
| + }
|
| + dtbl = fdct->divisors[qtblno];
|
| + for (i = 0; i < DCTSIZE2; i++) {
|
| +- compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
|
| ++ if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
|
| ++ && fdct->quantize == jsimd_quantize)
|
| ++ fdct->quantize = quantize;
|
| + }
|
| + break;
|
| + #endif
|
| +@@ -266,10 +275,12 @@
|
| + }
|
| + dtbl = fdct->divisors[qtblno];
|
| + for (i = 0; i < DCTSIZE2; i++) {
|
| +- compute_reciprocal(
|
| ++ if(!compute_reciprocal(
|
| + DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
|
| + (INT32) aanscales[i]),
|
| +- CONST_BITS-3), &dtbl[i]);
|
| ++ CONST_BITS-3), &dtbl[i])
|
| ++ && fdct->quantize == jsimd_quantize)
|
| ++ fdct->quantize = quantize;
|
| + }
|
| + }
|
| + break;
|
| +Index: jchuff.c
|
| +===================================================================
|
| +--- jchuff.c (revision 829)
|
| ++++ jchuff.c (working copy)
|
| +@@ -1,8 +1,10 @@
|
| + /*
|
| + * jchuff.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2009-2011, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains Huffman entropy encoding routines.
|
| +@@ -14,21 +16,6 @@
|
| + * permanent JPEG objects only upon successful completion of an MCU.
|
| + */
|
| +
|
| +-/* Modifications:
|
| +- * Copyright (C)2007 Sun Microsystems, Inc.
|
| +- * Copyright (C)2009 D. R. Commander
|
| +- *
|
| +- * This library is free software and may be redistributed and/or modified under
|
| +- * the terms of the wxWindows Library License, Version 3.1 or (at your option)
|
| +- * any later version. The full license is in the LICENSE.txt file included
|
| +- * with this distribution.
|
| +- *
|
| +- * This library is distributed in the hope that it will be useful,
|
| +- * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| +- * wxWindows Library License for more details.
|
| +- */
|
| +-
|
| + #define JPEG_INTERNALS
|
| + #include "jinclude.h"
|
| + #include "jpeglib.h"
|
| +@@ -35,13 +22,42 @@
|
| + #include "jchuff.h" /* Declarations shared with jcphuff.c */
|
| + #include <limits.h>
|
| +
|
| +-static unsigned char jpeg_first_bit_table[65536];
|
| +-int jpeg_first_bit_table_init=0;
|
| ++/*
|
| ++ * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
|
| ++ * used for bit counting rather than the lookup table. This will reduce the
|
| ++ * memory footprint by 64k, which is important for some mobile applications
|
| ++ * that create many isolated instances of libjpeg-turbo (web browsers, for
|
| ++ * instance.) This may improve performance on some mobile platforms as well.
|
| ++ * This feature is enabled by default only on ARM processors, because some x86
|
| ++ * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
|
| ++ * shown to have a significant performance impact even on the x86 chips that
|
| ++ * have a fast implementation of it. When building for ARMv6, you can
|
| ++ * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
|
| ++ * flags (this defines __thumb__).
|
| ++ */
|
| +
|
| ++/* NOTE: Both GCC and Clang define __GNUC__ */
|
| ++#if defined __GNUC__ && defined __arm__
|
| ++#if !defined __thumb__ || defined __thumb2__
|
| ++#define USE_CLZ_INTRINSIC
|
| ++#endif
|
| ++#endif
|
| ++
|
| ++#ifdef USE_CLZ_INTRINSIC
|
| ++#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
|
| ++#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
|
| ++#else
|
| ++static unsigned char jpeg_nbits_table[65536];
|
| ++static int jpeg_nbits_table_init = 0;
|
| ++#define JPEG_NBITS(x) (jpeg_nbits_table[x])
|
| ++#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
|
| ++#endif
|
| ++
|
| + #ifndef min
|
| + #define min(a,b) ((a)<(b)?(a):(b))
|
| + #endif
|
| +
|
| ++
|
| + /* Expanded entropy encoder object for Huffman encoding.
|
| + *
|
| + * The savable_state subrecord contains fields that change within an MCU,
|
| +@@ -49,7 +65,7 @@
|
| + */
|
| +
|
| + typedef struct {
|
| +- long put_buffer; /* current bit-accumulation buffer */
|
| ++ size_t put_buffer; /* current bit-accumulation buffer */
|
| + int put_bits; /* # of bits now in it */
|
| + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
|
| + } savable_state;
|
| +@@ -181,7 +197,6 @@
|
| + }
|
| +
|
| + /* Initialize bit buffer to empty */
|
| +-
|
| + entropy->saved.put_buffer = 0;
|
| + entropy->saved.put_bits = 0;
|
| +
|
| +@@ -285,14 +300,16 @@
|
| + dtbl->ehufsi[i] = huffsize[p];
|
| + }
|
| +
|
| +- if(!jpeg_first_bit_table_init) {
|
| ++#ifndef USE_CLZ_INTRINSIC
|
| ++ if(!jpeg_nbits_table_init) {
|
| + for(i = 0; i < 65536; i++) {
|
| +- int bit = 0, val = i;
|
| +- while (val) {val >>= 1; bit++;}
|
| +- jpeg_first_bit_table[i] = bit;
|
| ++ int nbits = 0, temp = i;
|
| ++ while (temp) {temp >>= 1; nbits++;}
|
| ++ jpeg_nbits_table[i] = nbits;
|
| + }
|
| +- jpeg_first_bit_table_init = 1;
|
| ++ jpeg_nbits_table_init = 1;
|
| + }
|
| ++#endif
|
| + }
|
| +
|
| +
|
| +@@ -312,8 +329,6 @@
|
| + {
|
| + struct jpeg_destination_mgr * dest = state->cinfo->dest;
|
| +
|
| +- dest->free_in_buffer = state->free_in_buffer;
|
| +-
|
| + if (! (*dest->empty_output_buffer) (state->cinfo))
|
| + return FALSE;
|
| + /* After a successful buffer dump, must reset buffer pointers */
|
| +@@ -325,178 +340,133 @@
|
| +
|
| + /* Outputting bits to the file */
|
| +
|
| +-/* Only the right 24 bits of put_buffer are used; the valid bits are
|
| +- * left-justified in this part. At most 16 bits can be passed to emit_bits
|
| +- * in one call, and we never retain more than 7 bits in put_buffer
|
| +- * between calls, so 24 bits are sufficient.
|
| ++/* These macros perform the same task as the emit_bits() function in the
|
| ++ * original libjpeg code. In addition to reducing overhead by explicitly
|
| ++ * inlining the code, additional performance is achieved by taking into
|
| ++ * account the size of the bit buffer and waiting until it is almost full
|
| ++ * before emptying it. This mostly benefits 64-bit platforms, since 6
|
| ++ * bytes can be stored in a 64-bit bit buffer before it has to be emptied.
|
| + */
|
| +
|
| +-/***************************************************************/
|
| +-
|
| +-#define EMIT_BYTE() { \
|
| +- if (0xFF == (*buffer++ = (unsigned char)(put_buffer >> (put_bits -= 8)))) \
|
| +- *buffer++ = 0; \
|
| ++#define EMIT_BYTE() { \
|
| ++ JOCTET c; \
|
| ++ put_bits -= 8; \
|
| ++ c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \
|
| ++ *buffer++ = c; \
|
| ++ if (c == 0xFF) /* need to stuff a zero byte? */ \
|
| ++ *buffer++ = 0; \
|
| + }
|
| +
|
| +-/***************************************************************/
|
| ++#define PUT_BITS(code, size) { \
|
| ++ put_bits += size; \
|
| ++ put_buffer = (put_buffer << size) | code; \
|
| ++}
|
| +
|
| +-#define DUMP_BITS_(code, size) { \
|
| +- put_bits += size; \
|
| +- put_buffer = (put_buffer << size) | code; \
|
| +- if (put_bits > 7) \
|
| +- while(put_bits > 7) \
|
| +- EMIT_BYTE() \
|
| +- }
|
| +-
|
| +-/***************************************************************/
|
| +-
|
| +-#define CHECKBUF15() { \
|
| +- if (put_bits > 15) { \
|
| +- EMIT_BYTE() \
|
| +- EMIT_BYTE() \
|
| +- } \
|
| ++#define CHECKBUF15() { \
|
| ++ if (put_bits > 15) { \
|
| ++ EMIT_BYTE() \
|
| ++ EMIT_BYTE() \
|
| ++ } \
|
| + }
|
| +
|
| +-#define CHECKBUF47() { \
|
| +- if (put_bits > 47) { \
|
| +- EMIT_BYTE() \
|
| +- EMIT_BYTE() \
|
| +- EMIT_BYTE() \
|
| +- EMIT_BYTE() \
|
| +- EMIT_BYTE() \
|
| +- EMIT_BYTE() \
|
| +- } \
|
| ++#define CHECKBUF31() { \
|
| ++ if (put_bits > 31) { \
|
| ++ EMIT_BYTE() \
|
| ++ EMIT_BYTE() \
|
| ++ EMIT_BYTE() \
|
| ++ EMIT_BYTE() \
|
| ++ } \
|
| + }
|
| +
|
| +-#define CHECKBUF31() { \
|
| +- if (put_bits > 31) { \
|
| +- EMIT_BYTE() \
|
| +- EMIT_BYTE() \
|
| +- EMIT_BYTE() \
|
| +- EMIT_BYTE() \
|
| +- } \
|
| ++#define CHECKBUF47() { \
|
| ++ if (put_bits > 47) { \
|
| ++ EMIT_BYTE() \
|
| ++ EMIT_BYTE() \
|
| ++ EMIT_BYTE() \
|
| ++ EMIT_BYTE() \
|
| ++ EMIT_BYTE() \
|
| ++ EMIT_BYTE() \
|
| ++ } \
|
| + }
|
| +
|
| +-/***************************************************************/
|
| ++#if __WORDSIZE==64 || defined(_WIN64)
|
| +
|
| +-#define DUMP_BITS_NOCHECK(code, size) { \
|
| +- put_bits += size; \
|
| +- put_buffer = (put_buffer << size) | code; \
|
| +- }
|
| ++#define EMIT_BITS(code, size) { \
|
| ++ CHECKBUF47() \
|
| ++ PUT_BITS(code, size) \
|
| ++}
|
| +
|
| +-#if __WORDSIZE==64
|
| +-
|
| +-#define DUMP_BITS(code, size) { \
|
| +- CHECKBUF47() \
|
| +- put_bits += size; \
|
| +- put_buffer = (put_buffer << size) | code; \
|
| ++#define EMIT_CODE(code, size) { \
|
| ++ temp2 &= (((INT32) 1)<<nbits) - 1; \
|
| ++ CHECKBUF31() \
|
| ++ PUT_BITS(code, size) \
|
| ++ PUT_BITS(temp2, nbits) \
|
| + }
|
| +
|
| + #else
|
| +
|
| +-#define DUMP_BITS(code, size) { \
|
| +- put_bits += size; \
|
| +- put_buffer = (put_buffer << size) | code; \
|
| +- CHECKBUF15() \
|
| +- }
|
| ++#define EMIT_BITS(code, size) { \
|
| ++ PUT_BITS(code, size) \
|
| ++ CHECKBUF15() \
|
| ++}
|
| +
|
| +-#endif
|
| +-
|
| +-/***************************************************************/
|
| +-
|
| +-#define DUMP_SINGLE_VALUE(ht, codevalue) { \
|
| +- size = ht->ehufsi[codevalue]; \
|
| +- code = ht->ehufco[codevalue]; \
|
| +- \
|
| +- DUMP_BITS(code, size) \
|
| ++#define EMIT_CODE(code, size) { \
|
| ++ temp2 &= (((INT32) 1)<<nbits) - 1; \
|
| ++ PUT_BITS(code, size) \
|
| ++ CHECKBUF15() \
|
| ++ PUT_BITS(temp2, nbits) \
|
| ++ CHECKBUF15() \
|
| + }
|
| +
|
| +-/***************************************************************/
|
| +-
|
| +-#define DUMP_VALUE_SLOW(ht, codevalue, t, nbits) { \
|
| +- size = ht->ehufsi[codevalue]; \
|
| +- code = ht->ehufco[codevalue]; \
|
| +- t &= ~(-1 << nbits); \
|
| +- DUMP_BITS_NOCHECK(code, size) \
|
| +- CHECKBUF15() \
|
| +- DUMP_BITS_NOCHECK(t, nbits) \
|
| +- CHECKBUF15() \
|
| +- }
|
| +-
|
| +-int _max=0;
|
| +-
|
| +-#if __WORDSIZE==64
|
| +-
|
| +-#define DUMP_VALUE(ht, codevalue, t, nbits) { \
|
| +- size = ht->ehufsi[codevalue]; \
|
| +- code = ht->ehufco[codevalue]; \
|
| +- t &= ~(-1 << nbits); \
|
| +- CHECKBUF31() \
|
| +- DUMP_BITS_NOCHECK(code, size) \
|
| +- DUMP_BITS_NOCHECK(t, nbits) \
|
| +- }
|
| +-
|
| +-#else
|
| +-
|
| +-#define DUMP_VALUE(ht, codevalue, t, nbits) { \
|
| +- size = ht->ehufsi[codevalue]; \
|
| +- code = ht->ehufco[codevalue]; \
|
| +- t &= ~(-1 << nbits); \
|
| +- DUMP_BITS_NOCHECK(code, size) \
|
| +- CHECKBUF15() \
|
| +- DUMP_BITS_NOCHECK(t, nbits) \
|
| +- CHECKBUF15() \
|
| +- }
|
| +-
|
| + #endif
|
| +
|
| +-/***************************************************************/
|
| +
|
| + #define BUFSIZE (DCTSIZE2 * 2)
|
| +
|
| +-#define LOAD_BUFFER() { \
|
| +- if (state->free_in_buffer < BUFSIZE) { \
|
| +- localbuf = 1; \
|
| +- buffer = _buffer; \
|
| +- } \
|
| +- else buffer = state->next_output_byte; \
|
| ++#define LOAD_BUFFER() { \
|
| ++ if (state->free_in_buffer < BUFSIZE) { \
|
| ++ localbuf = 1; \
|
| ++ buffer = _buffer; \
|
| ++ } \
|
| ++ else buffer = state->next_output_byte; \
|
| + }
|
| +
|
| +-#define STORE_BUFFER() { \
|
| +- if (localbuf) { \
|
| +- bytes = buffer - _buffer; \
|
| +- buffer = _buffer; \
|
| +- while (bytes > 0) { \
|
| +- bytestocopy = min(bytes, state->free_in_buffer); \
|
| +- MEMCOPY(state->next_output_byte, buffer, bytestocopy); \
|
| +- state->next_output_byte += bytestocopy; \
|
| +- buffer += bytestocopy; \
|
| +- state->free_in_buffer -= bytestocopy; \
|
| +- if (state->free_in_buffer == 0) \
|
| +- if (! dump_buffer(state)) return FALSE; \
|
| +- bytes -= bytestocopy; \
|
| +- } \
|
| +- } \
|
| +- else { \
|
| +- state->free_in_buffer -= (buffer - state->next_output_byte); \
|
| +- state->next_output_byte = buffer; \
|
| +- } \
|
| ++#define STORE_BUFFER() { \
|
| ++ if (localbuf) { \
|
| ++ bytes = buffer - _buffer; \
|
| ++ buffer = _buffer; \
|
| ++ while (bytes > 0) { \
|
| ++ bytestocopy = min(bytes, state->free_in_buffer); \
|
| ++ MEMCOPY(state->next_output_byte, buffer, bytestocopy); \
|
| ++ state->next_output_byte += bytestocopy; \
|
| ++ buffer += bytestocopy; \
|
| ++ state->free_in_buffer -= bytestocopy; \
|
| ++ if (state->free_in_buffer == 0) \
|
| ++ if (! dump_buffer(state)) return FALSE; \
|
| ++ bytes -= bytestocopy; \
|
| ++ } \
|
| ++ } \
|
| ++ else { \
|
| ++ state->free_in_buffer -= (buffer - state->next_output_byte); \
|
| ++ state->next_output_byte = buffer; \
|
| ++ } \
|
| + }
|
| +
|
| +-/***************************************************************/
|
| +
|
| + LOCAL(boolean)
|
| + flush_bits (working_state * state)
|
| + {
|
| +- unsigned char _buffer[BUFSIZE], *buffer;
|
| +- long put_buffer; int put_bits;
|
| +- int bytes, bytestocopy, localbuf = 0;
|
| ++ JOCTET _buffer[BUFSIZE], *buffer;
|
| ++ size_t put_buffer; int put_bits;
|
| ++ size_t bytes, bytestocopy; int localbuf = 0;
|
| +
|
| + put_buffer = state->cur.put_buffer;
|
| + put_bits = state->cur.put_bits;
|
| + LOAD_BUFFER()
|
| +
|
| +- DUMP_BITS_(0x7F, 7)
|
| ++ /* fill any partial byte with ones */
|
| ++ PUT_BITS(0x7F, 7)
|
| ++ while (put_bits >= 8) EMIT_BYTE()
|
| +
|
| + state->cur.put_buffer = 0; /* and reset bit-buffer to empty */
|
| + state->cur.put_bits = 0;
|
| +@@ -505,6 +475,7 @@
|
| + return TRUE;
|
| + }
|
| +
|
| ++
|
| + /* Encode a single block's worth of coefficients */
|
| +
|
| + LOCAL(boolean)
|
| +@@ -511,13 +482,13 @@
|
| + encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
|
| + c_derived_tbl *dctbl, c_derived_tbl *actbl)
|
| + {
|
| +- int temp, temp2;
|
| ++ int temp, temp2, temp3;
|
| + int nbits;
|
| +- int r, sflag, size, code;
|
| +- unsigned char _buffer[BUFSIZE], *buffer;
|
| +- long put_buffer; int put_bits;
|
| ++ int r, code, size;
|
| ++ JOCTET _buffer[BUFSIZE], *buffer;
|
| ++ size_t put_buffer; int put_bits;
|
| + int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
|
| +- int bytes, bytestocopy, localbuf = 0;
|
| ++ size_t bytes, bytestocopy; int localbuf = 0;
|
| +
|
| + put_buffer = state->cur.put_buffer;
|
| + put_bits = state->cur.put_bits;
|
| +@@ -527,50 +498,88 @@
|
| +
|
| + temp = temp2 = block[0] - last_dc_val;
|
| +
|
| +- sflag = temp >> 31;
|
| +- temp -= ((temp + temp) & sflag);
|
| +- temp2 += sflag;
|
| +- nbits = jpeg_first_bit_table[temp];
|
| +- DUMP_VALUE_SLOW(dctbl, nbits, temp2, nbits)
|
| ++ /* This is a well-known technique for obtaining the absolute value without a
|
| ++ * branch. It is derived from an assembly language technique presented in
|
| ++ * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by
|
| ++ * Agner Fog.
|
| ++ */
|
| ++ temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
|
| ++ temp ^= temp3;
|
| ++ temp -= temp3;
|
| +
|
| ++ /* For a negative input, want temp2 = bitwise complement of abs(input) */
|
| ++ /* This code assumes we are on a two's complement machine */
|
| ++ temp2 += temp3;
|
| ++
|
| ++ /* Find the number of bits needed for the magnitude of the coefficient */
|
| ++ nbits = JPEG_NBITS(temp);
|
| ++
|
| ++ /* Emit the Huffman-coded symbol for the number of bits */
|
| ++ code = dctbl->ehufco[nbits];
|
| ++ size = dctbl->ehufsi[nbits];
|
| ++ PUT_BITS(code, size)
|
| ++ CHECKBUF15()
|
| ++
|
| ++ /* Mask off any extra bits in code */
|
| ++ temp2 &= (((INT32) 1)<<nbits) - 1;
|
| ++
|
| ++ /* Emit that number of bits of the value, if positive, */
|
| ++ /* or the complement of its magnitude, if negative. */
|
| ++ PUT_BITS(temp2, nbits)
|
| ++ CHECKBUF15()
|
| ++
|
| + /* Encode the AC coefficients per section F.1.2.2 */
|
| +
|
| + r = 0; /* r = run length of zeros */
|
| +
|
| +-#define innerloop(order) { \
|
| +- temp2 = *(JCOEF*)((unsigned char*)block + order); \
|
| +- if(temp2 == 0) r++; \
|
| +- else { \
|
| +- temp = (JCOEF)temp2; \
|
| +- sflag = temp >> 31; \
|
| +- temp = (temp ^ sflag) - sflag; \
|
| +- temp2 += sflag; \
|
| +- nbits = jpeg_first_bit_table[temp]; \
|
| +- for(; r > 15; r -= 16) DUMP_BITS(code_0xf0, size_0xf0) \
|
| +- sflag = (r << 4) + nbits; \
|
| +- DUMP_VALUE(actbl, sflag, temp2, nbits) \
|
| ++/* Manually unroll the k loop to eliminate the counter variable. This
|
| ++ * improves performance greatly on systems with a limited number of
|
| ++ * registers (such as x86.)
|
| ++ */
|
| ++#define kloop(jpeg_natural_order_of_k) { \
|
| ++ if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
|
| ++ r++; \
|
| ++ } else { \
|
| ++ temp2 = temp; \
|
| ++ /* Branch-less absolute value, bitwise complement, etc., same as above */ \
|
| ++ temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \
|
| ++ temp ^= temp3; \
|
| ++ temp -= temp3; \
|
| ++ temp2 += temp3; \
|
| ++ nbits = JPEG_NBITS_NONZERO(temp); \
|
| ++ /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
|
| ++ while (r > 15) { \
|
| ++ EMIT_BITS(code_0xf0, size_0xf0) \
|
| ++ r -= 16; \
|
| ++ } \
|
| ++ /* Emit Huffman symbol for run length / number of bits */ \
|
| ++ temp3 = (r << 4) + nbits; \
|
| ++ code = actbl->ehufco[temp3]; \
|
| ++ size = actbl->ehufsi[temp3]; \
|
| ++ EMIT_CODE(code, size) \
|
| + r = 0; \
|
| +- }}
|
| ++ } \
|
| ++}
|
| +
|
| +- innerloop(2*1); innerloop(2*8); innerloop(2*16); innerloop(2*9);
|
| +- innerloop(2*2); innerloop(2*3); innerloop(2*10); innerloop(2*17);
|
| +- innerloop(2*24); innerloop(2*32); innerloop(2*25); innerloop(2*18);
|
| +- innerloop(2*11); innerloop(2*4); innerloop(2*5); innerloop(2*12);
|
| +- innerloop(2*19); innerloop(2*26); innerloop(2*33); innerloop(2*40);
|
| +- innerloop(2*48); innerloop(2*41); innerloop(2*34); innerloop(2*27);
|
| +- innerloop(2*20); innerloop(2*13); innerloop(2*6); innerloop(2*7);
|
| +- innerloop(2*14); innerloop(2*21); innerloop(2*28); innerloop(2*35);
|
| +- innerloop(2*42); innerloop(2*49); innerloop(2*56); innerloop(2*57);
|
| +- innerloop(2*50); innerloop(2*43); innerloop(2*36); innerloop(2*29);
|
| +- innerloop(2*22); innerloop(2*15); innerloop(2*23); innerloop(2*30);
|
| +- innerloop(2*37); innerloop(2*44); innerloop(2*51); innerloop(2*58);
|
| +- innerloop(2*59); innerloop(2*52); innerloop(2*45); innerloop(2*38);
|
| +- innerloop(2*31); innerloop(2*39); innerloop(2*46); innerloop(2*53);
|
| +- innerloop(2*60); innerloop(2*61); innerloop(2*54); innerloop(2*47);
|
| +- innerloop(2*55); innerloop(2*62); innerloop(2*63);
|
| ++ /* One iteration for each value in jpeg_natural_order[] */
|
| ++ kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3);
|
| ++ kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18);
|
| ++ kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26);
|
| ++ kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27);
|
| ++ kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21);
|
| ++ kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57);
|
| ++ kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15);
|
| ++ kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58);
|
| ++ kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39);
|
| ++ kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47);
|
| ++ kloop(55); kloop(62); kloop(63);
|
| +
|
| + /* If the last coef(s) were zero, emit an end-of-block code */
|
| +- if (r > 0) DUMP_SINGLE_VALUE(actbl, 0x0)
|
| ++ if (r > 0) {
|
| ++ code = actbl->ehufco[0];
|
| ++ size = actbl->ehufsi[0];
|
| ++ EMIT_BITS(code, size)
|
| ++ }
|
| +
|
| + state->cur.put_buffer = put_buffer;
|
| + state->cur.put_bits = put_bits;
|
| +Index: jcinit.c
|
| +===================================================================
|
| +--- jcinit.c (revision 829)
|
| ++++ jcinit.c (working copy)
|
| +@@ -42,7 +42,11 @@
|
| + jinit_forward_dct(cinfo);
|
| + /* Entropy encoding: either Huffman or arithmetic coding. */
|
| + if (cinfo->arith_code) {
|
| ++#ifdef C_ARITH_CODING_SUPPORTED
|
| ++ jinit_arith_encoder(cinfo);
|
| ++#else
|
| + ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
| ++#endif
|
| + } else {
|
| + if (cinfo->progressive_mode) {
|
| + #ifdef C_PROGRESSIVE_SUPPORTED
|
| +Index: jcmainct.c
|
| +===================================================================
|
| +--- jcmainct.c (revision 829)
|
| ++++ jcmainct.c (working copy)
|
| +@@ -68,32 +68,32 @@
|
| + METHODDEF(void)
|
| + start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
|
| + {
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| +
|
| + /* Do nothing in raw-data mode. */
|
| + if (cinfo->raw_data_in)
|
| + return;
|
| +
|
| +- main->cur_iMCU_row = 0; /* initialize counters */
|
| +- main->rowgroup_ctr = 0;
|
| +- main->suspended = FALSE;
|
| +- main->pass_mode = pass_mode; /* save mode for use by process_data */
|
| ++ main_ptr->cur_iMCU_row = 0; /* initialize counters */
|
| ++ main_ptr->rowgroup_ctr = 0;
|
| ++ main_ptr->suspended = FALSE;
|
| ++ main_ptr->pass_mode = pass_mode; /* save mode for use by process_data */
|
| +
|
| + switch (pass_mode) {
|
| + case JBUF_PASS_THRU:
|
| + #ifdef FULL_MAIN_BUFFER_SUPPORTED
|
| +- if (main->whole_image[0] != NULL)
|
| ++ if (main_ptr->whole_image[0] != NULL)
|
| + ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
| + #endif
|
| +- main->pub.process_data = process_data_simple_main;
|
| ++ main_ptr->pub.process_data = process_data_simple_main;
|
| + break;
|
| + #ifdef FULL_MAIN_BUFFER_SUPPORTED
|
| + case JBUF_SAVE_SOURCE:
|
| + case JBUF_CRANK_DEST:
|
| + case JBUF_SAVE_AND_PASS:
|
| +- if (main->whole_image[0] == NULL)
|
| ++ if (main_ptr->whole_image[0] == NULL)
|
| + ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
| +- main->pub.process_data = process_data_buffer_main;
|
| ++ main_ptr->pub.process_data = process_data_buffer_main;
|
| + break;
|
| + #endif
|
| + default:
|
| +@@ -114,14 +114,14 @@
|
| + JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
|
| + JDIMENSION in_rows_avail)
|
| + {
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| +
|
| +- while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
|
| ++ while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
|
| + /* Read input data if we haven't filled the main buffer yet */
|
| +- if (main->rowgroup_ctr < DCTSIZE)
|
| ++ if (main_ptr->rowgroup_ctr < DCTSIZE)
|
| + (*cinfo->prep->pre_process_data) (cinfo,
|
| + input_buf, in_row_ctr, in_rows_avail,
|
| +- main->buffer, &main->rowgroup_ctr,
|
| ++ main_ptr->buffer, &main_ptr->rowgroup_ctr,
|
| + (JDIMENSION) DCTSIZE);
|
| +
|
| + /* If we don't have a full iMCU row buffered, return to application for
|
| +@@ -128,11 +128,11 @@
|
| + * more data. Note that preprocessor will always pad to fill the iMCU row
|
| + * at the bottom of the image.
|
| + */
|
| +- if (main->rowgroup_ctr != DCTSIZE)
|
| ++ if (main_ptr->rowgroup_ctr != DCTSIZE)
|
| + return;
|
| +
|
| + /* Send the completed row to the compressor */
|
| +- if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
|
| ++ if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
|
| + /* If compressor did not consume the whole row, then we must need to
|
| + * suspend processing and return to the application. In this situation
|
| + * we pretend we didn't yet consume the last input row; otherwise, if
|
| +@@ -139,9 +139,9 @@
|
| + * it happened to be the last row of the image, the application would
|
| + * think we were done.
|
| + */
|
| +- if (! main->suspended) {
|
| ++ if (! main_ptr->suspended) {
|
| + (*in_row_ctr)--;
|
| +- main->suspended = TRUE;
|
| ++ main_ptr->suspended = TRUE;
|
| + }
|
| + return;
|
| + }
|
| +@@ -148,12 +148,12 @@
|
| + /* We did finish the row. Undo our little suspension hack if a previous
|
| + * call suspended; then mark the main buffer empty.
|
| + */
|
| +- if (main->suspended) {
|
| ++ if (main_ptr->suspended) {
|
| + (*in_row_ctr)++;
|
| +- main->suspended = FALSE;
|
| ++ main_ptr->suspended = FALSE;
|
| + }
|
| +- main->rowgroup_ctr = 0;
|
| +- main->cur_iMCU_row++;
|
| ++ main_ptr->rowgroup_ctr = 0;
|
| ++ main_ptr->cur_iMCU_row++;
|
| + }
|
| + }
|
| +
|
| +@@ -170,25 +170,25 @@
|
| + JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
|
| + JDIMENSION in_rows_avail)
|
| + {
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| + int ci;
|
| + jpeg_component_info *compptr;
|
| +- boolean writing = (main->pass_mode != JBUF_CRANK_DEST);
|
| ++ boolean writing = (main_ptr->pass_mode != JBUF_CRANK_DEST);
|
| +
|
| +- while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
|
| ++ while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
|
| + /* Realign the virtual buffers if at the start of an iMCU row. */
|
| +- if (main->rowgroup_ctr == 0) {
|
| ++ if (main_ptr->rowgroup_ctr == 0) {
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| +- main->buffer[ci] = (*cinfo->mem->access_virt_sarray)
|
| +- ((j_common_ptr) cinfo, main->whole_image[ci],
|
| +- main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
|
| ++ main_ptr->buffer[ci] = (*cinfo->mem->access_virt_sarray)
|
| ++ ((j_common_ptr) cinfo, main_ptr->whole_image[ci],
|
| ++ main_ptr->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
|
| + (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
|
| + }
|
| + /* In a read pass, pretend we just read some source data. */
|
| + if (! writing) {
|
| + *in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
|
| +- main->rowgroup_ctr = DCTSIZE;
|
| ++ main_ptr->rowgroup_ctr = DCTSIZE;
|
| + }
|
| + }
|
| +
|
| +@@ -197,16 +197,16 @@
|
| + if (writing) {
|
| + (*cinfo->prep->pre_process_data) (cinfo,
|
| + input_buf, in_row_ctr, in_rows_avail,
|
| +- main->buffer, &main->rowgroup_ctr,
|
| ++ main_ptr->buffer, &main_ptr->rowgroup_ctr,
|
| + (JDIMENSION) DCTSIZE);
|
| + /* Return to application if we need more data to fill the iMCU row. */
|
| +- if (main->rowgroup_ctr < DCTSIZE)
|
| ++ if (main_ptr->rowgroup_ctr < DCTSIZE)
|
| + return;
|
| + }
|
| +
|
| + /* Emit data, unless this is a sink-only pass. */
|
| +- if (main->pass_mode != JBUF_SAVE_SOURCE) {
|
| +- if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
|
| ++ if (main_ptr->pass_mode != JBUF_SAVE_SOURCE) {
|
| ++ if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
|
| + /* If compressor did not consume the whole row, then we must need to
|
| + * suspend processing and return to the application. In this situation
|
| + * we pretend we didn't yet consume the last input row; otherwise, if
|
| +@@ -213,9 +213,9 @@
|
| + * it happened to be the last row of the image, the application would
|
| + * think we were done.
|
| + */
|
| +- if (! main->suspended) {
|
| ++ if (! main_ptr->suspended) {
|
| + (*in_row_ctr)--;
|
| +- main->suspended = TRUE;
|
| ++ main_ptr->suspended = TRUE;
|
| + }
|
| + return;
|
| + }
|
| +@@ -222,15 +222,15 @@
|
| + /* We did finish the row. Undo our little suspension hack if a previous
|
| + * call suspended; then mark the main buffer empty.
|
| + */
|
| +- if (main->suspended) {
|
| ++ if (main_ptr->suspended) {
|
| + (*in_row_ctr)++;
|
| +- main->suspended = FALSE;
|
| ++ main_ptr->suspended = FALSE;
|
| + }
|
| + }
|
| +
|
| + /* If get here, we are done with this iMCU row. Mark buffer empty. */
|
| +- main->rowgroup_ctr = 0;
|
| +- main->cur_iMCU_row++;
|
| ++ main_ptr->rowgroup_ctr = 0;
|
| ++ main_ptr->cur_iMCU_row++;
|
| + }
|
| + }
|
| +
|
| +@@ -244,15 +244,15 @@
|
| + GLOBAL(void)
|
| + jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
|
| + {
|
| +- my_main_ptr main;
|
| ++ my_main_ptr main_ptr;
|
| + int ci;
|
| + jpeg_component_info *compptr;
|
| +
|
| +- main = (my_main_ptr)
|
| ++ main_ptr = (my_main_ptr)
|
| + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
|
| + SIZEOF(my_main_controller));
|
| +- cinfo->main = (struct jpeg_c_main_controller *) main;
|
| +- main->pub.start_pass = start_pass_main;
|
| ++ cinfo->main = (struct jpeg_c_main_controller *) main_ptr;
|
| ++ main_ptr->pub.start_pass = start_pass_main;
|
| +
|
| + /* We don't need to create a buffer in raw-data mode. */
|
| + if (cinfo->raw_data_in)
|
| +@@ -267,7 +267,7 @@
|
| + /* Note we pad the bottom to a multiple of the iMCU height */
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| +- main->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
|
| ++ main_ptr->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
|
| + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
|
| + compptr->width_in_blocks * DCTSIZE,
|
| + (JDIMENSION) jround_up((long) compptr->height_in_blocks,
|
| +@@ -279,12 +279,12 @@
|
| + #endif
|
| + } else {
|
| + #ifdef FULL_MAIN_BUFFER_SUPPORTED
|
| +- main->whole_image[0] = NULL; /* flag for no virtual arrays */
|
| ++ main_ptr->whole_image[0] = NULL; /* flag for no virtual arrays */
|
| + #endif
|
| + /* Allocate a strip buffer for each component */
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| +- main->buffer[ci] = (*cinfo->mem->alloc_sarray)
|
| ++ main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
|
| + ((j_common_ptr) cinfo, JPOOL_IMAGE,
|
| + compptr->width_in_blocks * DCTSIZE,
|
| + (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
|
| +Index: jcmarker.c
|
| +===================================================================
|
| +--- jcmarker.c (revision 829)
|
| ++++ jcmarker.c (working copy)
|
| +@@ -1,8 +1,11 @@
|
| + /*
|
| + * jcmarker.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1998, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modified 2003-2010 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2010, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains routines to write JPEG datastream markers.
|
| +@@ -11,6 +14,7 @@
|
| + #define JPEG_INTERNALS
|
| + #include "jinclude.h"
|
| + #include "jpeglib.h"
|
| ++#include "jpegcomp.h"
|
| +
|
| +
|
| + typedef enum { /* JPEG marker codes */
|
| +@@ -18,24 +22,24 @@
|
| + M_SOF1 = 0xc1,
|
| + M_SOF2 = 0xc2,
|
| + M_SOF3 = 0xc3,
|
| +-
|
| ++
|
| + M_SOF5 = 0xc5,
|
| + M_SOF6 = 0xc6,
|
| + M_SOF7 = 0xc7,
|
| +-
|
| ++
|
| + M_JPG = 0xc8,
|
| + M_SOF9 = 0xc9,
|
| + M_SOF10 = 0xca,
|
| + M_SOF11 = 0xcb,
|
| +-
|
| ++
|
| + M_SOF13 = 0xcd,
|
| + M_SOF14 = 0xce,
|
| + M_SOF15 = 0xcf,
|
| +-
|
| ++
|
| + M_DHT = 0xc4,
|
| +-
|
| ++
|
| + M_DAC = 0xcc,
|
| +-
|
| ++
|
| + M_RST0 = 0xd0,
|
| + M_RST1 = 0xd1,
|
| + M_RST2 = 0xd2,
|
| +@@ -44,7 +48,7 @@
|
| + M_RST5 = 0xd5,
|
| + M_RST6 = 0xd6,
|
| + M_RST7 = 0xd7,
|
| +-
|
| ++
|
| + M_SOI = 0xd8,
|
| + M_EOI = 0xd9,
|
| + M_SOS = 0xda,
|
| +@@ -53,7 +57,7 @@
|
| + M_DRI = 0xdd,
|
| + M_DHP = 0xde,
|
| + M_EXP = 0xdf,
|
| +-
|
| ++
|
| + M_APP0 = 0xe0,
|
| + M_APP1 = 0xe1,
|
| + M_APP2 = 0xe2,
|
| +@@ -70,13 +74,13 @@
|
| + M_APP13 = 0xed,
|
| + M_APP14 = 0xee,
|
| + M_APP15 = 0xef,
|
| +-
|
| ++
|
| + M_JPG0 = 0xf0,
|
| + M_JPG13 = 0xfd,
|
| + M_COM = 0xfe,
|
| +-
|
| ++
|
| + M_TEM = 0x01,
|
| +-
|
| ++
|
| + M_ERROR = 0x100
|
| + } JPEG_MARKER;
|
| +
|
| +@@ -229,33 +233,39 @@
|
| + char ac_in_use[NUM_ARITH_TBLS];
|
| + int length, i;
|
| + jpeg_component_info *compptr;
|
| +-
|
| ++
|
| + for (i = 0; i < NUM_ARITH_TBLS; i++)
|
| + dc_in_use[i] = ac_in_use[i] = 0;
|
| +-
|
| ++
|
| + for (i = 0; i < cinfo->comps_in_scan; i++) {
|
| + compptr = cinfo->cur_comp_info[i];
|
| +- dc_in_use[compptr->dc_tbl_no] = 1;
|
| +- ac_in_use[compptr->ac_tbl_no] = 1;
|
| ++ /* DC needs no table for refinement scan */
|
| ++ if (cinfo->Ss == 0 && cinfo->Ah == 0)
|
| ++ dc_in_use[compptr->dc_tbl_no] = 1;
|
| ++ /* AC needs no table when not present */
|
| ++ if (cinfo->Se)
|
| ++ ac_in_use[compptr->ac_tbl_no] = 1;
|
| + }
|
| +-
|
| ++
|
| + length = 0;
|
| + for (i = 0; i < NUM_ARITH_TBLS; i++)
|
| + length += dc_in_use[i] + ac_in_use[i];
|
| +-
|
| +- emit_marker(cinfo, M_DAC);
|
| +-
|
| +- emit_2bytes(cinfo, length*2 + 2);
|
| +-
|
| +- for (i = 0; i < NUM_ARITH_TBLS; i++) {
|
| +- if (dc_in_use[i]) {
|
| +- emit_byte(cinfo, i);
|
| +- emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
|
| ++
|
| ++ if (length) {
|
| ++ emit_marker(cinfo, M_DAC);
|
| ++
|
| ++ emit_2bytes(cinfo, length*2 + 2);
|
| ++
|
| ++ for (i = 0; i < NUM_ARITH_TBLS; i++) {
|
| ++ if (dc_in_use[i]) {
|
| ++ emit_byte(cinfo, i);
|
| ++ emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
|
| ++ }
|
| ++ if (ac_in_use[i]) {
|
| ++ emit_byte(cinfo, i + 0x10);
|
| ++ emit_byte(cinfo, cinfo->arith_ac_K[i]);
|
| ++ }
|
| + }
|
| +- if (ac_in_use[i]) {
|
| +- emit_byte(cinfo, i + 0x10);
|
| +- emit_byte(cinfo, cinfo->arith_ac_K[i]);
|
| +- }
|
| + }
|
| + #endif /* C_ARITH_CODING_SUPPORTED */
|
| + }
|
| +@@ -285,13 +295,13 @@
|
| + emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
|
| +
|
| + /* Make sure image isn't bigger than SOF field can handle */
|
| +- if ((long) cinfo->image_height > 65535L ||
|
| +- (long) cinfo->image_width > 65535L)
|
| ++ if ((long) cinfo->_jpeg_height > 65535L ||
|
| ++ (long) cinfo->_jpeg_width > 65535L)
|
| + ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
|
| +
|
| + emit_byte(cinfo, cinfo->data_precision);
|
| +- emit_2bytes(cinfo, (int) cinfo->image_height);
|
| +- emit_2bytes(cinfo, (int) cinfo->image_width);
|
| ++ emit_2bytes(cinfo, (int) cinfo->_jpeg_height);
|
| ++ emit_2bytes(cinfo, (int) cinfo->_jpeg_width);
|
| +
|
| + emit_byte(cinfo, cinfo->num_components);
|
| +
|
| +@@ -320,22 +330,16 @@
|
| + for (i = 0; i < cinfo->comps_in_scan; i++) {
|
| + compptr = cinfo->cur_comp_info[i];
|
| + emit_byte(cinfo, compptr->component_id);
|
| +- td = compptr->dc_tbl_no;
|
| +- ta = compptr->ac_tbl_no;
|
| +- if (cinfo->progressive_mode) {
|
| +- /* Progressive mode: only DC or only AC tables are used in one scan;
|
| +- * furthermore, Huffman coding of DC refinement uses no table at all.
|
| +- * We emit 0 for unused field(s); this is recommended by the P&M text
|
| +- * but does not seem to be specified in the standard.
|
| +- */
|
| +- if (cinfo->Ss == 0) {
|
| +- ta = 0; /* DC scan */
|
| +- if (cinfo->Ah != 0 && !cinfo->arith_code)
|
| +- td = 0; /* no DC table either */
|
| +- } else {
|
| +- td = 0; /* AC scan */
|
| +- }
|
| +- }
|
| ++
|
| ++ /* We emit 0 for unused field(s); this is recommended by the P&M text
|
| ++ * but does not seem to be specified in the standard.
|
| ++ */
|
| ++
|
| ++ /* DC needs no table for refinement scan */
|
| ++ td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0;
|
| ++ /* AC needs no table when not present */
|
| ++ ta = cinfo->Se ? compptr->ac_tbl_no : 0;
|
| ++
|
| + emit_byte(cinfo, (td << 4) + ta);
|
| + }
|
| +
|
| +@@ -529,7 +533,10 @@
|
| +
|
| + /* Emit the proper SOF marker */
|
| + if (cinfo->arith_code) {
|
| +- emit_sof(cinfo, M_SOF9); /* SOF code for arithmetic coding */
|
| ++ if (cinfo->progressive_mode)
|
| ++ emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
|
| ++ else
|
| ++ emit_sof(cinfo, M_SOF9); /* SOF code for sequential arithmetic */
|
| + } else {
|
| + if (cinfo->progressive_mode)
|
| + emit_sof(cinfo, M_SOF2); /* SOF code for progressive Huffman */
|
| +@@ -566,19 +573,12 @@
|
| + */
|
| + for (i = 0; i < cinfo->comps_in_scan; i++) {
|
| + compptr = cinfo->cur_comp_info[i];
|
| +- if (cinfo->progressive_mode) {
|
| +- /* Progressive mode: only DC or only AC tables are used in one scan */
|
| +- if (cinfo->Ss == 0) {
|
| +- if (cinfo->Ah == 0) /* DC needs no table for refinement scan */
|
| +- emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
|
| +- } else {
|
| +- emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
|
| +- }
|
| +- } else {
|
| +- /* Sequential mode: need both DC and AC tables */
|
| ++ /* DC needs no table for refinement scan */
|
| ++ if (cinfo->Ss == 0 && cinfo->Ah == 0)
|
| + emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
|
| ++ /* AC needs no table when not present */
|
| ++ if (cinfo->Se)
|
| + emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
|
| +- }
|
| + }
|
| + }
|
| +
|
| +Index: jcmaster.c
|
| +===================================================================
|
| +--- jcmaster.c (revision 829)
|
| ++++ jcmaster.c (working copy)
|
| +@@ -1,8 +1,11 @@
|
| + /*
|
| + * jcmaster.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modified 2003-2010 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2010, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains master control logic for the JPEG compressor.
|
| +@@ -14,6 +17,7 @@
|
| + #define JPEG_INTERNALS
|
| + #include "jinclude.h"
|
| + #include "jpeglib.h"
|
| ++#include "jpegcomp.h"
|
| +
|
| +
|
| + /* Private state */
|
| +@@ -42,8 +46,28 @@
|
| + * Support routines that do various essential calculations.
|
| + */
|
| +
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++/*
|
| ++ * Compute JPEG image dimensions and related values.
|
| ++ * NOTE: this is exported for possible use by application.
|
| ++ * Hence it mustn't do anything that can't be done twice.
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
|
| ++/* Do computations that are needed before master selection phase */
|
| ++{
|
| ++ /* Hardwire it to "no scaling" */
|
| ++ cinfo->jpeg_width = cinfo->image_width;
|
| ++ cinfo->jpeg_height = cinfo->image_height;
|
| ++ cinfo->min_DCT_h_scaled_size = DCTSIZE;
|
| ++ cinfo->min_DCT_v_scaled_size = DCTSIZE;
|
| ++}
|
| ++#endif
|
| ++
|
| ++
|
| + LOCAL(void)
|
| +-initial_setup (j_compress_ptr cinfo)
|
| ++initial_setup (j_compress_ptr cinfo, boolean transcode_only)
|
| + /* Do computations that are needed before master selection phase */
|
| + {
|
| + int ci;
|
| +@@ -51,14 +75,21 @@
|
| + long samplesperrow;
|
| + JDIMENSION jd_samplesperrow;
|
| +
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++ if (!transcode_only)
|
| ++#endif
|
| ++ jpeg_calc_jpeg_dimensions(cinfo);
|
| ++#endif
|
| ++
|
| + /* Sanity check on image dimensions */
|
| +- if (cinfo->image_height <= 0 || cinfo->image_width <= 0
|
| ++ if (cinfo->_jpeg_height <= 0 || cinfo->_jpeg_width <= 0
|
| + || cinfo->num_components <= 0 || cinfo->input_components <= 0)
|
| + ERREXIT(cinfo, JERR_EMPTY_IMAGE);
|
| +
|
| + /* Make sure image isn't bigger than I can handle */
|
| +- if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
|
| +- (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
|
| ++ if ((long) cinfo->_jpeg_height > (long) JPEG_MAX_DIMENSION ||
|
| ++ (long) cinfo->_jpeg_width > (long) JPEG_MAX_DIMENSION)
|
| + ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
|
| +
|
| + /* Width of an input scanline must be representable as JDIMENSION. */
|
| +@@ -96,20 +127,24 @@
|
| + /* Fill in the correct component_index value; don't rely on application */
|
| + compptr->component_index = ci;
|
| + /* For compression, we never do DCT scaling. */
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
|
| ++#else
|
| + compptr->DCT_scaled_size = DCTSIZE;
|
| ++#endif
|
| + /* Size in DCT blocks */
|
| + compptr->width_in_blocks = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
|
| ++ jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor,
|
| + (long) (cinfo->max_h_samp_factor * DCTSIZE));
|
| + compptr->height_in_blocks = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
|
| ++ jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor,
|
| + (long) (cinfo->max_v_samp_factor * DCTSIZE));
|
| + /* Size in samples */
|
| + compptr->downsampled_width = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
|
| ++ jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor,
|
| + (long) cinfo->max_h_samp_factor);
|
| + compptr->downsampled_height = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
|
| ++ jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor,
|
| + (long) cinfo->max_v_samp_factor);
|
| + /* Mark component needed (this flag isn't actually used for compression) */
|
| + compptr->component_needed = TRUE;
|
| +@@ -119,7 +154,7 @@
|
| + * main controller will call coefficient controller).
|
| + */
|
| + cinfo->total_iMCU_rows = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_height,
|
| ++ jdiv_round_up((long) cinfo->_jpeg_height,
|
| + (long) (cinfo->max_v_samp_factor*DCTSIZE));
|
| + }
|
| +
|
| +@@ -347,10 +382,10 @@
|
| +
|
| + /* Overall image size in MCUs */
|
| + cinfo->MCUs_per_row = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_width,
|
| ++ jdiv_round_up((long) cinfo->_jpeg_width,
|
| + (long) (cinfo->max_h_samp_factor*DCTSIZE));
|
| + cinfo->MCU_rows_in_scan = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_height,
|
| ++ jdiv_round_up((long) cinfo->_jpeg_height,
|
| + (long) (cinfo->max_v_samp_factor*DCTSIZE));
|
| +
|
| + cinfo->blocks_in_MCU = 0;
|
| +@@ -554,7 +589,7 @@
|
| + master->pub.is_last_pass = FALSE;
|
| +
|
| + /* Validate parameters, determine derived values */
|
| +- initial_setup(cinfo);
|
| ++ initial_setup(cinfo, transcode_only);
|
| +
|
| + if (cinfo->scan_info != NULL) {
|
| + #ifdef C_MULTISCAN_FILES_SUPPORTED
|
| +@@ -567,7 +602,7 @@
|
| + cinfo->num_scans = 1;
|
| + }
|
| +
|
| +- if (cinfo->progressive_mode) /* TEMPORARY HACK ??? */
|
| ++ if (cinfo->progressive_mode && !cinfo->arith_code) /* TEMPORARY HACK ??? */
|
| + cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */
|
| +
|
| + /* Initialize my private state */
|
| +Index: jcparam.c
|
| +===================================================================
|
| +--- jcparam.c (revision 829)
|
| ++++ jcparam.c (working copy)
|
| +@@ -1,9 +1,11 @@
|
| + /*
|
| + * jcparam.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1998, Thomas G. Lane.
|
| +- * Copyright (C) 2009, D. R. Commander.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modified 2003-2008 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2009-2011, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains optional default-setting code for the JPEG compressor.
|
| +@@ -61,7 +63,50 @@
|
| + }
|
| +
|
| +
|
| ++/* These are the sample quantization tables given in JPEG spec section K.1.
|
| ++ * The spec says that the values given produce "good" quality, and
|
| ++ * when divided by 2, "very good" quality.
|
| ++ */
|
| ++static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
|
| ++ 16, 11, 10, 16, 24, 40, 51, 61,
|
| ++ 12, 12, 14, 19, 26, 58, 60, 55,
|
| ++ 14, 13, 16, 24, 40, 57, 69, 56,
|
| ++ 14, 17, 22, 29, 51, 87, 80, 62,
|
| ++ 18, 22, 37, 56, 68, 109, 103, 77,
|
| ++ 24, 35, 55, 64, 81, 104, 113, 92,
|
| ++ 49, 64, 78, 87, 103, 121, 120, 101,
|
| ++ 72, 92, 95, 98, 112, 100, 103, 99
|
| ++};
|
| ++static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
|
| ++ 17, 18, 24, 47, 99, 99, 99, 99,
|
| ++ 18, 21, 26, 66, 99, 99, 99, 99,
|
| ++ 24, 26, 56, 99, 99, 99, 99, 99,
|
| ++ 47, 66, 99, 99, 99, 99, 99, 99,
|
| ++ 99, 99, 99, 99, 99, 99, 99, 99,
|
| ++ 99, 99, 99, 99, 99, 99, 99, 99,
|
| ++ 99, 99, 99, 99, 99, 99, 99, 99,
|
| ++ 99, 99, 99, 99, 99, 99, 99, 99
|
| ++};
|
| ++
|
| ++
|
| ++#if JPEG_LIB_VERSION >= 70
|
| + GLOBAL(void)
|
| ++jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
|
| ++/* Set or change the 'quality' (quantization) setting, using default tables
|
| ++ * and straight percentage-scaling quality scales.
|
| ++ * This entry point allows different scalings for luminance and chrominance.
|
| ++ */
|
| ++{
|
| ++ /* Set up two quantization tables using the specified scaling */
|
| ++ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
|
| ++ cinfo->q_scale_factor[0], force_baseline);
|
| ++ jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
|
| ++ cinfo->q_scale_factor[1], force_baseline);
|
| ++}
|
| ++#endif
|
| ++
|
| ++
|
| ++GLOBAL(void)
|
| + jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
|
| + boolean force_baseline)
|
| + /* Set or change the 'quality' (quantization) setting, using default tables
|
| +@@ -70,31 +115,6 @@
|
| + * applications that insist on a linear percentage scaling.
|
| + */
|
| + {
|
| +- /* These are the sample quantization tables given in JPEG spec section K.1.
|
| +- * The spec says that the values given produce "good" quality, and
|
| +- * when divided by 2, "very good" quality.
|
| +- */
|
| +- static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
|
| +- 16, 11, 10, 16, 24, 40, 51, 61,
|
| +- 12, 12, 14, 19, 26, 58, 60, 55,
|
| +- 14, 13, 16, 24, 40, 57, 69, 56,
|
| +- 14, 17, 22, 29, 51, 87, 80, 62,
|
| +- 18, 22, 37, 56, 68, 109, 103, 77,
|
| +- 24, 35, 55, 64, 81, 104, 113, 92,
|
| +- 49, 64, 78, 87, 103, 121, 120, 101,
|
| +- 72, 92, 95, 98, 112, 100, 103, 99
|
| +- };
|
| +- static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
|
| +- 17, 18, 24, 47, 99, 99, 99, 99,
|
| +- 18, 21, 26, 66, 99, 99, 99, 99,
|
| +- 24, 26, 56, 99, 99, 99, 99, 99,
|
| +- 47, 66, 99, 99, 99, 99, 99, 99,
|
| +- 99, 99, 99, 99, 99, 99, 99, 99,
|
| +- 99, 99, 99, 99, 99, 99, 99, 99,
|
| +- 99, 99, 99, 99, 99, 99, 99, 99,
|
| +- 99, 99, 99, 99, 99, 99, 99, 99
|
| +- };
|
| +-
|
| + /* Set up two quantization tables using the specified scaling */
|
| + jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
|
| + scale_factor, force_baseline);
|
| +@@ -285,6 +305,10 @@
|
| +
|
| + /* Initialize everything not dependent on the color space */
|
| +
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ cinfo->scale_num = 1; /* 1:1 scaling */
|
| ++ cinfo->scale_denom = 1;
|
| ++#endif
|
| + cinfo->data_precision = BITS_IN_JSAMPLE;
|
| + /* Set up two quantization tables using default quality of 75 */
|
| + jpeg_set_quality(cinfo, 75, TRUE);
|
| +@@ -321,6 +345,11 @@
|
| + /* By default, use the simpler non-cosited sampling alignment */
|
| + cinfo->CCIR601_sampling = FALSE;
|
| +
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ /* By default, apply fancy downsampling */
|
| ++ cinfo->do_fancy_downsampling = TRUE;
|
| ++#endif
|
| ++
|
| + /* No input smoothing */
|
| + cinfo->smoothing_factor = 0;
|
| +
|
| +@@ -370,6 +399,10 @@
|
| + case JCS_EXT_BGRX:
|
| + case JCS_EXT_XBGR:
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_RGBA:
|
| ++ case JCS_EXT_BGRA:
|
| ++ case JCS_EXT_ABGR:
|
| ++ case JCS_EXT_ARGB:
|
| + jpeg_set_colorspace(cinfo, JCS_YCbCr);
|
| + break;
|
| + case JCS_YCbCr:
|
| +Index: jctrans.c
|
| +===================================================================
|
| +--- jctrans.c (revision 829)
|
| ++++ jctrans.c (working copy)
|
| +@@ -2,6 +2,7 @@
|
| + * jctrans.c
|
| + *
|
| + * Copyright (C) 1995-1998, Thomas G. Lane.
|
| ++ * Modified 2000-2009 by Guido Vollbeding.
|
| + * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| +@@ -76,6 +77,12 @@
|
| + dstinfo->image_height = srcinfo->image_height;
|
| + dstinfo->input_components = srcinfo->num_components;
|
| + dstinfo->in_color_space = srcinfo->jpeg_color_space;
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ dstinfo->jpeg_width = srcinfo->output_width;
|
| ++ dstinfo->jpeg_height = srcinfo->output_height;
|
| ++ dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size;
|
| ++ dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size;
|
| ++#endif
|
| + /* Initialize all parameters to default values */
|
| + jpeg_set_defaults(dstinfo);
|
| + /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
|
| +@@ -167,7 +174,11 @@
|
| +
|
| + /* Entropy encoding: either Huffman or arithmetic coding. */
|
| + if (cinfo->arith_code) {
|
| ++#ifdef C_ARITH_CODING_SUPPORTED
|
| ++ jinit_arith_encoder(cinfo);
|
| ++#else
|
| + ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
| ++#endif
|
| + } else {
|
| + if (cinfo->progressive_mode) {
|
| + #ifdef C_PROGRESSIVE_SUPPORTED
|
| +Index: jdapistd.c
|
| +===================================================================
|
| +--- jdapistd.c (revision 829)
|
| ++++ jdapistd.c (working copy)
|
| +@@ -1,8 +1,11 @@
|
| + /*
|
| + * jdapistd.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1994-1996, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2010, 2015, D. R. Commander.
|
| ++ * Copyright (C) 2015, Google, Inc.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains application interface code for the decompression half
|
| +@@ -14,9 +17,10 @@
|
| + * whole decompression library into a transcoder.
|
| + */
|
| +
|
| +-#define JPEG_INTERNALS
|
| +-#include "jinclude.h"
|
| +-#include "jpeglib.h"
|
| ++#include "jdmainct.h"
|
| ++#include "jdcoefct.h"
|
| ++#include "jdsample.h"
|
| ++#include "jmemsys.h"
|
| +
|
| +
|
| + /* Forward declarations */
|
| +@@ -176,7 +180,236 @@
|
| + }
|
| +
|
| +
|
| ++
|
| ++/* Dummy color convert function used by jpeg_skip_scanlines() */
|
| ++LOCAL(void)
|
| ++noop_convert (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
| ++ JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
| ++{
|
| ++}
|
| ++
|
| ++
|
| + /*
|
| ++ * In some cases, it is best to call jpeg_read_scanlines() and discard the
|
| ++ * output, rather than skipping the scanlines, because this allows us to
|
| ++ * maintain the internal state of the context-based upsampler. In these cases,
|
| ++ * we set up and tear down a dummy color converter in order to avoid valgrind
|
| ++ * errors and to achieve the best possible performance.
|
| ++ */
|
| ++LOCAL(void)
|
| ++read_and_discard_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines)
|
| ++{
|
| ++ JDIMENSION n;
|
| ++ void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
| ++ JDIMENSION input_row, JSAMPARRAY output_buf,
|
| ++ int num_rows);
|
| ++
|
| ++ color_convert = cinfo->cconvert->color_convert;
|
| ++ cinfo->cconvert->color_convert = noop_convert;
|
| ++
|
| ++ for (n = 0; n < num_lines; n++)
|
| ++ jpeg_read_scanlines(cinfo, NULL, 1);
|
| ++
|
| ++ cinfo->cconvert->color_convert = color_convert;
|
| ++}
|
| ++
|
| ++/*
|
| ++ * Called by jpeg_skip_scanlines(). This partially skips a decompress block by
|
| ++ * incrementing the rowgroup counter.
|
| ++ */
|
| ++
|
| ++LOCAL(void)
|
| ++increment_simple_rowgroup_ctr (j_decompress_ptr cinfo, JDIMENSION rows)
|
| ++{
|
| ++ JDIMENSION rows_left;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| ++
|
| ++ /* Increment the counter to the next row group after the skipped rows. */
|
| ++ main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor;
|
| ++
|
| ++ /* Partially skipping a row group would involve modifying the internal state
|
| ++ * of the upsampler, so read the remaining rows into a dummy buffer instead.
|
| ++ */
|
| ++ rows_left = rows % cinfo->max_v_samp_factor;
|
| ++ cinfo->output_scanline += rows - rows_left;
|
| ++
|
| ++ read_and_discard_scanlines(cinfo, rows_left);
|
| ++}
|
| ++
|
| ++/*
|
| ++ * Skips some scanlines of data from the JPEG decompressor.
|
| ++ *
|
| ++ * The return value will be the number of lines actually skipped. If skipping
|
| ++ * num_lines would move beyond the end of the image, then the actual number of
|
| ++ * lines remaining in the image is returned. Otherwise, the return value will
|
| ++ * be equal to num_lines.
|
| ++ *
|
| ++ * Refer to libjpeg.txt for more information.
|
| ++ */
|
| ++
|
| ++GLOBAL(JDIMENSION)
|
| ++jpeg_skip_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines)
|
| ++{
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| ++ my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
|
| ++ my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
|
| ++ JDIMENSION i, x;
|
| ++ int y;
|
| ++ JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row;
|
| ++ JDIMENSION lines_to_skip, lines_to_read;
|
| ++
|
| ++ if (cinfo->global_state != DSTATE_SCANNING)
|
| ++ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
| ++
|
| ++ /* Do not skip past the bottom of the image. */
|
| ++ if (cinfo->output_scanline + num_lines >= cinfo->output_height) {
|
| ++ cinfo->output_scanline = cinfo->output_height;
|
| ++ return cinfo->output_height - cinfo->output_scanline;
|
| ++ }
|
| ++
|
| ++ if (num_lines == 0)
|
| ++ return 0;
|
| ++
|
| ++ lines_per_iMCU_row = cinfo->_min_DCT_scaled_size * cinfo->max_v_samp_factor;
|
| ++ lines_left_in_iMCU_row =
|
| ++ (lines_per_iMCU_row - (cinfo->output_scanline % lines_per_iMCU_row)) %
|
| ++ lines_per_iMCU_row;
|
| ++ lines_after_iMCU_row = num_lines - lines_left_in_iMCU_row;
|
| ++
|
| ++ /* Skip the lines remaining in the current iMCU row. When upsampling
|
| ++ * requires context rows, we need the previous and next rows in order to read
|
| ++ * the current row. This adds some complexity.
|
| ++ */
|
| ++ if (cinfo->upsample->need_context_rows) {
|
| ++ /* If the skipped lines would not move us past the current iMCU row, we
|
| ++ * read the lines and ignore them. There might be a faster way of doing
|
| ++ * this, but we are facing increasing complexity for diminishing returns.
|
| ++ * The increasing complexity would be a by-product of meddling with the
|
| ++ * state machine used to skip context rows. Near the end of an iMCU row,
|
| ++ * the next iMCU row may have already been entropy-decoded. In this unique
|
| ++ * case, we will read the next iMCU row if we cannot skip past it as well.
|
| ++ */
|
| ++ if ((num_lines < lines_left_in_iMCU_row + 1) ||
|
| ++ (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full &&
|
| ++ lines_after_iMCU_row < lines_per_iMCU_row + 1)) {
|
| ++ read_and_discard_scanlines(cinfo, num_lines);
|
| ++ return num_lines;
|
| ++ }
|
| ++
|
| ++ /* If the next iMCU row has already been entropy-decoded, make sure that
|
| ++ * we do not skip too far.
|
| ++ */
|
| ++ if (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full) {
|
| ++ cinfo->output_scanline += lines_left_in_iMCU_row + lines_per_iMCU_row;
|
| ++ lines_after_iMCU_row -= lines_per_iMCU_row;
|
| ++ } else {
|
| ++ cinfo->output_scanline += lines_left_in_iMCU_row;
|
| ++ }
|
| ++
|
| ++ /* If we have just completed the first block, adjust the buffer pointers */
|
| ++ if (main_ptr->iMCU_row_ctr == 0 ||
|
| ++ (main_ptr->iMCU_row_ctr == 1 && lines_left_in_iMCU_row > 2))
|
| ++ set_wraparound_pointers(cinfo);
|
| ++ main_ptr->buffer_full = FALSE;
|
| ++ main_ptr->rowgroup_ctr = 0;
|
| ++ main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
|
| ++ upsample->next_row_out = cinfo->max_v_samp_factor;
|
| ++ upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
|
| ++ }
|
| ++
|
| ++ /* Skipping is much simpler when context rows are not required. */
|
| ++ else {
|
| ++ if (num_lines < lines_left_in_iMCU_row) {
|
| ++ increment_simple_rowgroup_ctr(cinfo, num_lines);
|
| ++ return num_lines;
|
| ++ } else {
|
| ++ cinfo->output_scanline += lines_left_in_iMCU_row;
|
| ++ main_ptr->buffer_full = FALSE;
|
| ++ main_ptr->rowgroup_ctr = 0;
|
| ++ upsample->next_row_out = cinfo->max_v_samp_factor;
|
| ++ upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
|
| ++ }
|
| ++ }
|
| ++
|
| ++ /* Calculate how many full iMCU rows we can skip. */
|
| ++ if (cinfo->upsample->need_context_rows)
|
| ++ lines_to_skip = ((lines_after_iMCU_row - 1) / lines_per_iMCU_row) *
|
| ++ lines_per_iMCU_row;
|
| ++ else
|
| ++ lines_to_skip = (lines_after_iMCU_row / lines_per_iMCU_row) *
|
| ++ lines_per_iMCU_row;
|
| ++ /* Calculate the number of lines that remain to be skipped after skipping all
|
| ++ * of the full iMCU rows that we can. We will not read these lines unless we
|
| ++ * have to.
|
| ++ */
|
| ++ lines_to_read = lines_after_iMCU_row - lines_to_skip;
|
| ++
|
| ++ /* For images requiring multiple scans (progressive, non-interleaved, etc.),
|
| ++ * all of the entropy decoding occurs in jpeg_start_decompress(), assuming
|
| ++ * that the input data source is non-suspending. This makes skipping easy.
|
| ++ */
|
| ++ if (cinfo->inputctl->has_multiple_scans) {
|
| ++ if (cinfo->upsample->need_context_rows) {
|
| ++ cinfo->output_scanline += lines_to_skip;
|
| ++ cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
|
| ++ main_ptr->iMCU_row_ctr += lines_after_iMCU_row / lines_per_iMCU_row;
|
| ++ /* It is complex to properly move to the middle of a context block, so
|
| ++ * read the remaining lines instead of skipping them.
|
| ++ */
|
| ++ read_and_discard_scanlines(cinfo, lines_to_read);
|
| ++ } else {
|
| ++ cinfo->output_scanline += lines_to_skip;
|
| ++ cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
|
| ++ increment_simple_rowgroup_ctr(cinfo, lines_to_read);
|
| ++ }
|
| ++ upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
|
| ++ return num_lines;
|
| ++ }
|
| ++
|
| ++ /* Skip the iMCU rows that we can safely skip. */
|
| ++ for (i = 0; i < lines_to_skip; i += lines_per_iMCU_row) {
|
| ++ for (y = 0; y < coef->MCU_rows_per_iMCU_row; y++) {
|
| ++ for (x = 0; x < cinfo->MCUs_per_row; x++) {
|
| ++ /* Calling decode_mcu() with a NULL pointer causes it to discard the
|
| ++ * decoded coefficients. This is ~5% faster for large subsets, but
|
| ++ * it's tough to tell a difference for smaller images.
|
| ++ */
|
| ++ (*cinfo->entropy->decode_mcu) (cinfo, NULL);
|
| ++ }
|
| ++ }
|
| ++ cinfo->input_iMCU_row++;
|
| ++ cinfo->output_iMCU_row++;
|
| ++ if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows)
|
| ++ start_iMCU_row(cinfo);
|
| ++ else
|
| ++ (*cinfo->inputctl->finish_input_pass) (cinfo);
|
| ++ }
|
| ++ cinfo->output_scanline += lines_to_skip;
|
| ++
|
| ++ if (cinfo->upsample->need_context_rows) {
|
| ++ /* Context-based upsampling keeps track of iMCU rows. */
|
| ++ main_ptr->iMCU_row_ctr += lines_to_skip / lines_per_iMCU_row;
|
| ++
|
| ++ /* It is complex to properly move to the middle of a context block, so
|
| ++ * read the remaining lines instead of skipping them.
|
| ++ */
|
| ++ read_and_discard_scanlines(cinfo, lines_to_read);
|
| ++ } else {
|
| ++ increment_simple_rowgroup_ctr(cinfo, lines_to_read);
|
| ++ }
|
| ++
|
| ++ /* Since skipping lines involves skipping the upsampling step, the value of
|
| ++ * "rows_to_go" will become invalid unless we set it here. NOTE: This is a
|
| ++ * bit odd, since "rows_to_go" seems to be redundantly keeping track of
|
| ++ * output_scanline.
|
| ++ */
|
| ++ upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
|
| ++
|
| ++ /* Always skip the requested number of lines. */
|
| ++ return num_lines;
|
| ++}
|
| ++
|
| ++/*
|
| + * Alternate entry point to read raw data.
|
| + * Processes exactly one iMCU row per call, unless suspended.
|
| + */
|
| +@@ -202,7 +435,7 @@
|
| + }
|
| +
|
| + /* Verify that at least one iMCU row can be returned. */
|
| +- lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size;
|
| ++ lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size;
|
| + if (max_lines < lines_per_iMCU_row)
|
| + ERREXIT(cinfo, JERR_BUFFER_SIZE);
|
| +
|
| +Index: jdatadst.c
|
| +===================================================================
|
| +--- jdatadst.c (revision 829)
|
| ++++ jdatadst.c (working copy)
|
| +@@ -1,14 +1,17 @@
|
| + /*
|
| + * jdatadst.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1994-1996, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modified 2009-2012 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2013, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains compression data destination routines for the case of
|
| +- * emitting JPEG data to a file (or any stdio stream). While these routines
|
| +- * are sufficient for most applications, some will want to use a different
|
| +- * destination manager.
|
| ++ * emitting JPEG data to memory or to a file (or any stdio stream).
|
| ++ * While these routines are sufficient for most applications,
|
| ++ * some will want to use a different destination manager.
|
| + * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
|
| + * JOCTETs into 8-bit-wide elements on external storage. If char is wider
|
| + * than 8 bits on your machine, you may need to do some tweaking.
|
| +@@ -19,7 +22,12 @@
|
| + #include "jpeglib.h"
|
| + #include "jerror.h"
|
| +
|
| ++#ifndef HAVE_STDLIB_H /* <stdlib.h> should declare malloc(),free() */
|
| ++extern void * malloc JPP((size_t size));
|
| ++extern void free JPP((void *ptr));
|
| ++#endif
|
| +
|
| ++
|
| + /* Expanded data destination object for stdio output */
|
| +
|
| + typedef struct {
|
| +@@ -34,6 +42,23 @@
|
| + #define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */
|
| +
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++/* Expanded data destination object for memory output */
|
| ++
|
| ++typedef struct {
|
| ++ struct jpeg_destination_mgr pub; /* public fields */
|
| ++
|
| ++ unsigned char ** outbuffer; /* target buffer */
|
| ++ unsigned long * outsize;
|
| ++ unsigned char * newbuffer; /* newly allocated buffer */
|
| ++ JOCTET * buffer; /* start of buffer */
|
| ++ size_t bufsize;
|
| ++} my_mem_destination_mgr;
|
| ++
|
| ++typedef my_mem_destination_mgr * my_mem_dest_ptr;
|
| ++#endif
|
| ++
|
| ++
|
| + /*
|
| + * Initialize destination --- called by jpeg_start_compress
|
| + * before any data is actually written.
|
| +@@ -53,7 +78,15 @@
|
| + dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
|
| + }
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++METHODDEF(void)
|
| ++init_mem_destination (j_compress_ptr cinfo)
|
| ++{
|
| ++ /* no work necessary here */
|
| ++}
|
| ++#endif
|
| +
|
| ++
|
| + /*
|
| + * Empty the output buffer --- called whenever buffer fills up.
|
| + *
|
| +@@ -92,7 +125,39 @@
|
| + return TRUE;
|
| + }
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++METHODDEF(boolean)
|
| ++empty_mem_output_buffer (j_compress_ptr cinfo)
|
| ++{
|
| ++ size_t nextsize;
|
| ++ JOCTET * nextbuffer;
|
| ++ my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
|
| +
|
| ++ /* Try to allocate new buffer with double size */
|
| ++ nextsize = dest->bufsize * 2;
|
| ++ nextbuffer = (JOCTET *) malloc(nextsize);
|
| ++
|
| ++ if (nextbuffer == NULL)
|
| ++ ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
|
| ++
|
| ++ MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
|
| ++
|
| ++ if (dest->newbuffer != NULL)
|
| ++ free(dest->newbuffer);
|
| ++
|
| ++ dest->newbuffer = nextbuffer;
|
| ++
|
| ++ dest->pub.next_output_byte = nextbuffer + dest->bufsize;
|
| ++ dest->pub.free_in_buffer = dest->bufsize;
|
| ++
|
| ++ dest->buffer = nextbuffer;
|
| ++ dest->bufsize = nextsize;
|
| ++
|
| ++ return TRUE;
|
| ++}
|
| ++#endif
|
| ++
|
| ++
|
| + /*
|
| + * Terminate destination --- called by jpeg_finish_compress
|
| + * after all data has been written. Usually needs to flush buffer.
|
| +@@ -119,7 +184,18 @@
|
| + ERREXIT(cinfo, JERR_FILE_WRITE);
|
| + }
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++METHODDEF(void)
|
| ++term_mem_destination (j_compress_ptr cinfo)
|
| ++{
|
| ++ my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
|
| +
|
| ++ *dest->outbuffer = dest->buffer;
|
| ++ *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer);
|
| ++}
|
| ++#endif
|
| ++
|
| ++
|
| + /*
|
| + * Prepare for output to a stdio stream.
|
| + * The caller must have already opened the stream, and is responsible
|
| +@@ -149,3 +225,55 @@
|
| + dest->pub.term_destination = term_destination;
|
| + dest->outfile = outfile;
|
| + }
|
| ++
|
| ++
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++/*
|
| ++ * Prepare for output to a memory buffer.
|
| ++ * The caller may supply an own initial buffer with appropriate size.
|
| ++ * Otherwise, or when the actual data output exceeds the given size,
|
| ++ * the library adapts the buffer size as necessary.
|
| ++ * The standard library functions malloc/free are used for allocating
|
| ++ * larger memory, so the buffer is available to the application after
|
| ++ * finishing compression, and then the application is responsible for
|
| ++ * freeing the requested memory.
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_mem_dest (j_compress_ptr cinfo,
|
| ++ unsigned char ** outbuffer, unsigned long * outsize)
|
| ++{
|
| ++ my_mem_dest_ptr dest;
|
| ++
|
| ++ if (outbuffer == NULL || outsize == NULL) /* sanity check */
|
| ++ ERREXIT(cinfo, JERR_BUFFER_SIZE);
|
| ++
|
| ++ /* The destination object is made permanent so that multiple JPEG images
|
| ++ * can be written to the same buffer without re-executing jpeg_mem_dest.
|
| ++ */
|
| ++ if (cinfo->dest == NULL) { /* first time for this JPEG object? */
|
| ++ cinfo->dest = (struct jpeg_destination_mgr *)
|
| ++ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
|
| ++ SIZEOF(my_mem_destination_mgr));
|
| ++ }
|
| ++
|
| ++ dest = (my_mem_dest_ptr) cinfo->dest;
|
| ++ dest->pub.init_destination = init_mem_destination;
|
| ++ dest->pub.empty_output_buffer = empty_mem_output_buffer;
|
| ++ dest->pub.term_destination = term_mem_destination;
|
| ++ dest->outbuffer = outbuffer;
|
| ++ dest->outsize = outsize;
|
| ++ dest->newbuffer = NULL;
|
| ++
|
| ++ if (*outbuffer == NULL || *outsize == 0) {
|
| ++ /* Allocate initial buffer */
|
| ++ dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
|
| ++ if (dest->newbuffer == NULL)
|
| ++ ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
|
| ++ *outsize = OUTPUT_BUF_SIZE;
|
| ++ }
|
| ++
|
| ++ dest->pub.next_output_byte = dest->buffer = *outbuffer;
|
| ++ dest->pub.free_in_buffer = dest->bufsize = *outsize;
|
| ++}
|
| ++#endif
|
| +Index: jdatasrc.c
|
| +===================================================================
|
| +--- jdatasrc.c (revision 829)
|
| ++++ jdatasrc.c (working copy)
|
| +@@ -1,14 +1,17 @@
|
| + /*
|
| + * jdatasrc.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1994-1996, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modified 2009-2011 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2013, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains decompression data source routines for the case of
|
| +- * reading JPEG data from a file (or any stdio stream). While these routines
|
| +- * are sufficient for most applications, some will want to use a different
|
| +- * source manager.
|
| ++ * reading JPEG data from memory or from a file (or any stdio stream).
|
| ++ * While these routines are sufficient for most applications,
|
| ++ * some will want to use a different source manager.
|
| + * IMPORTANT: we assume that fread() will correctly transcribe an array of
|
| + * JOCTETs from 8-bit-wide elements on external storage. If char is wider
|
| + * than 8 bits on your machine, you may need to do some tweaking.
|
| +@@ -52,7 +55,15 @@
|
| + src->start_of_file = TRUE;
|
| + }
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++METHODDEF(void)
|
| ++init_mem_source (j_decompress_ptr cinfo)
|
| ++{
|
| ++ /* no work necessary here */
|
| ++}
|
| ++#endif
|
| +
|
| ++
|
| + /*
|
| + * Fill the input buffer --- called whenever buffer is emptied.
|
| + *
|
| +@@ -111,7 +122,30 @@
|
| + return TRUE;
|
| + }
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++METHODDEF(boolean)
|
| ++fill_mem_input_buffer (j_decompress_ptr cinfo)
|
| ++{
|
| ++ static const JOCTET mybuffer[4] = {
|
| ++ (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
|
| ++ };
|
| +
|
| ++ /* The whole JPEG data is expected to reside in the supplied memory
|
| ++ * buffer, so any request for more data beyond the given buffer size
|
| ++ * is treated as an error.
|
| ++ */
|
| ++ WARNMS(cinfo, JWRN_JPEG_EOF);
|
| ++
|
| ++ /* Insert a fake EOI marker */
|
| ++
|
| ++ cinfo->src->next_input_byte = mybuffer;
|
| ++ cinfo->src->bytes_in_buffer = 2;
|
| ++
|
| ++ return TRUE;
|
| ++}
|
| ++#endif
|
| ++
|
| ++
|
| + /*
|
| + * Skip data --- used to skip over a potentially large amount of
|
| + * uninteresting data (such as an APPn marker).
|
| +@@ -127,7 +161,7 @@
|
| + METHODDEF(void)
|
| + skip_input_data (j_decompress_ptr cinfo, long num_bytes)
|
| + {
|
| +- my_src_ptr src = (my_src_ptr) cinfo->src;
|
| ++ struct jpeg_source_mgr * src = cinfo->src;
|
| +
|
| + /* Just a dumb implementation for now. Could use fseek() except
|
| + * it doesn't work on pipes. Not clear that being smart is worth
|
| +@@ -134,15 +168,15 @@
|
| + * any trouble anyway --- large skips are infrequent.
|
| + */
|
| + if (num_bytes > 0) {
|
| +- while (num_bytes > (long) src->pub.bytes_in_buffer) {
|
| +- num_bytes -= (long) src->pub.bytes_in_buffer;
|
| +- (void) fill_input_buffer(cinfo);
|
| ++ while (num_bytes > (long) src->bytes_in_buffer) {
|
| ++ num_bytes -= (long) src->bytes_in_buffer;
|
| ++ (void) (*src->fill_input_buffer) (cinfo);
|
| + /* note we assume that fill_input_buffer will never return FALSE,
|
| + * so suspension need not be handled.
|
| + */
|
| + }
|
| +- src->pub.next_input_byte += (size_t) num_bytes;
|
| +- src->pub.bytes_in_buffer -= (size_t) num_bytes;
|
| ++ src->next_input_byte += (size_t) num_bytes;
|
| ++ src->bytes_in_buffer -= (size_t) num_bytes;
|
| + }
|
| + }
|
| +
|
| +@@ -210,3 +244,40 @@
|
| + src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
|
| + src->pub.next_input_byte = NULL; /* until buffer loaded */
|
| + }
|
| ++
|
| ++
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++/*
|
| ++ * Prepare for input from a supplied memory buffer.
|
| ++ * The buffer must contain the whole JPEG data.
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_mem_src (j_decompress_ptr cinfo,
|
| ++ unsigned char * inbuffer, unsigned long insize)
|
| ++{
|
| ++ struct jpeg_source_mgr * src;
|
| ++
|
| ++ if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */
|
| ++ ERREXIT(cinfo, JERR_INPUT_EMPTY);
|
| ++
|
| ++ /* The source object is made permanent so that a series of JPEG images
|
| ++ * can be read from the same buffer by calling jpeg_mem_src only before
|
| ++ * the first one.
|
| ++ */
|
| ++ if (cinfo->src == NULL) { /* first time for this JPEG object? */
|
| ++ cinfo->src = (struct jpeg_source_mgr *)
|
| ++ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
|
| ++ SIZEOF(struct jpeg_source_mgr));
|
| ++ }
|
| ++
|
| ++ src = cinfo->src;
|
| ++ src->init_source = init_mem_source;
|
| ++ src->fill_input_buffer = fill_mem_input_buffer;
|
| ++ src->skip_input_data = skip_input_data;
|
| ++ src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
|
| ++ src->term_source = term_source;
|
| ++ src->bytes_in_buffer = (size_t) insize;
|
| ++ src->next_input_byte = (JOCTET *) inbuffer;
|
| ++}
|
| ++#endif
|
| +Index: jdcoefct.c
|
| +===================================================================
|
| +--- jdcoefct.c (revision 829)
|
| ++++ jdcoefct.c (working copy)
|
| +@@ -1,8 +1,11 @@
|
| + /*
|
| + * jdcoefct.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1994-1997, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++ * Copyright (C) 2010, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains the coefficient buffer controller for decompression.
|
| +@@ -14,56 +17,10 @@
|
| + * Also, the input side (only) is used when reading a file for transcoding.
|
| + */
|
| +
|
| +-#define JPEG_INTERNALS
|
| +-#include "jinclude.h"
|
| +-#include "jpeglib.h"
|
| ++#include "jdcoefct.h"
|
| ++#include "jpegcomp.h"
|
| +
|
| +-/* Block smoothing is only applicable for progressive JPEG, so: */
|
| +-#ifndef D_PROGRESSIVE_SUPPORTED
|
| +-#undef BLOCK_SMOOTHING_SUPPORTED
|
| +-#endif
|
| +
|
| +-/* Private buffer controller object */
|
| +-
|
| +-typedef struct {
|
| +- struct jpeg_d_coef_controller pub; /* public fields */
|
| +-
|
| +- /* These variables keep track of the current location of the input side. */
|
| +- /* cinfo->input_iMCU_row is also used for this. */
|
| +- JDIMENSION MCU_ctr; /* counts MCUs processed in current row */
|
| +- int MCU_vert_offset; /* counts MCU rows within iMCU row */
|
| +- int MCU_rows_per_iMCU_row; /* number of such rows needed */
|
| +-
|
| +- /* The output side's location is represented by cinfo->output_iMCU_row. */
|
| +-
|
| +- /* In single-pass modes, it's sufficient to buffer just one MCU.
|
| +- * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
|
| +- * and let the entropy decoder write into that workspace each time.
|
| +- * (On 80x86, the workspace is FAR even though it's not really very big;
|
| +- * this is to keep the module interfaces unchanged when a large coefficient
|
| +- * buffer is necessary.)
|
| +- * In multi-pass modes, this array points to the current MCU's blocks
|
| +- * within the virtual arrays; it is used only by the input side.
|
| +- */
|
| +- JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
|
| +-
|
| +- /* Temporary workspace for one MCU */
|
| +- JCOEF * workspace;
|
| +-
|
| +-#ifdef D_MULTISCAN_FILES_SUPPORTED
|
| +- /* In multi-pass modes, we need a virtual block array for each component. */
|
| +- jvirt_barray_ptr whole_image[MAX_COMPONENTS];
|
| +-#endif
|
| +-
|
| +-#ifdef BLOCK_SMOOTHING_SUPPORTED
|
| +- /* When doing block smoothing, we latch coefficient Al values here */
|
| +- int * coef_bits_latch;
|
| +-#define SAVED_COEFS 6 /* we save coef_bits[0..5] */
|
| +-#endif
|
| +-} my_coef_controller;
|
| +-
|
| +-typedef my_coef_controller * my_coef_ptr;
|
| +-
|
| + /* Forward declarations */
|
| + METHODDEF(int) decompress_onepass
|
| + JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
|
| +@@ -78,30 +35,6 @@
|
| + #endif
|
| +
|
| +
|
| +-LOCAL(void)
|
| +-start_iMCU_row (j_decompress_ptr cinfo)
|
| +-/* Reset within-iMCU-row counters for a new row (input side) */
|
| +-{
|
| +- my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
|
| +-
|
| +- /* In an interleaved scan, an MCU row is the same as an iMCU row.
|
| +- * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
|
| +- * But at the bottom of the image, process only what's left.
|
| +- */
|
| +- if (cinfo->comps_in_scan > 1) {
|
| +- coef->MCU_rows_per_iMCU_row = 1;
|
| +- } else {
|
| +- if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
|
| +- coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
|
| +- else
|
| +- coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
|
| +- }
|
| +-
|
| +- coef->MCU_ctr = 0;
|
| +- coef->MCU_vert_offset = 0;
|
| +-}
|
| +-
|
| +-
|
| + /*
|
| + * Initialize for an input processing pass.
|
| + */
|
| +@@ -190,7 +123,7 @@
|
| + useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
|
| + : compptr->last_col_width;
|
| + output_ptr = output_buf[compptr->component_index] +
|
| +- yoffset * compptr->DCT_scaled_size;
|
| ++ yoffset * compptr->_DCT_scaled_size;
|
| + start_col = MCU_col_num * compptr->MCU_sample_width;
|
| + for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
|
| + if (cinfo->input_iMCU_row < last_iMCU_row ||
|
| +@@ -200,11 +133,11 @@
|
| + (*inverse_DCT) (cinfo, compptr,
|
| + (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
|
| + output_ptr, output_col);
|
| +- output_col += compptr->DCT_scaled_size;
|
| ++ output_col += compptr->_DCT_scaled_size;
|
| + }
|
| + }
|
| + blkn += compptr->MCU_width;
|
| +- output_ptr += compptr->DCT_scaled_size;
|
| ++ output_ptr += compptr->_DCT_scaled_size;
|
| + }
|
| + }
|
| + }
|
| +@@ -365,9 +298,9 @@
|
| + (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
|
| + output_ptr, output_col);
|
| + buffer_ptr++;
|
| +- output_col += compptr->DCT_scaled_size;
|
| ++ output_col += compptr->_DCT_scaled_size;
|
| + }
|
| +- output_ptr += compptr->DCT_scaled_size;
|
| ++ output_ptr += compptr->_DCT_scaled_size;
|
| + }
|
| + }
|
| +
|
| +@@ -660,9 +593,9 @@
|
| + DC4 = DC5; DC5 = DC6;
|
| + DC7 = DC8; DC8 = DC9;
|
| + buffer_ptr++, prev_block_row++, next_block_row++;
|
| +- output_col += compptr->DCT_scaled_size;
|
| ++ output_col += compptr->_DCT_scaled_size;
|
| + }
|
| +- output_ptr += compptr->DCT_scaled_size;
|
| ++ output_ptr += compptr->_DCT_scaled_size;
|
| + }
|
| + }
|
| +
|
| +Index: jdcolor.c
|
| +===================================================================
|
| +--- jdcolor.c (revision 829)
|
| ++++ jdcolor.c (working copy)
|
| +@@ -1,10 +1,12 @@
|
| + /*
|
| + * jdcolor.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| ++ * Modified 2011 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +- * Copyright (C) 2009, D. R. Commander.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Copyright (C) 2009, 2011-2012, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains output colorspace conversion routines.
|
| +@@ -14,6 +16,7 @@
|
| + #include "jinclude.h"
|
| + #include "jpeglib.h"
|
| + #include "jsimd.h"
|
| ++#include "config.h"
|
| +
|
| +
|
| + /* Private subobject */
|
| +@@ -26,6 +29,9 @@
|
| + int * Cb_b_tab; /* => table for Cb to B conversion */
|
| + INT32 * Cr_g_tab; /* => table for Cr to G conversion */
|
| + INT32 * Cb_g_tab; /* => table for Cb to G conversion */
|
| ++
|
| ++ /* Private state for RGB->Y conversion */
|
| ++ INT32 * rgb_y_tab; /* => table for RGB to Y conversion */
|
| + } my_color_deconverter;
|
| +
|
| + typedef my_color_deconverter * my_cconvert_ptr;
|
| +@@ -32,14 +38,19 @@
|
| +
|
| +
|
| + /**************** YCbCr -> RGB conversion: most common case **************/
|
| ++/**************** RGB -> Y conversion: less common case **************/
|
| +
|
| + /*
|
| + * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
|
| + * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
|
| + * The conversion equations to be implemented are therefore
|
| ++ *
|
| + * R = Y + 1.40200 * Cr
|
| + * G = Y - 0.34414 * Cb - 0.71414 * Cr
|
| + * B = Y + 1.77200 * Cb
|
| ++ *
|
| ++ * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
| ++ *
|
| + * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
|
| + * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
|
| + *
|
| +@@ -64,7 +75,132 @@
|
| + #define ONE_HALF ((INT32) 1 << (SCALEBITS-1))
|
| + #define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
|
| +
|
| ++/* We allocate one big table for RGB->Y conversion and divide it up into
|
| ++ * three parts, instead of doing three alloc_small requests. This lets us
|
| ++ * use a single table base address, which can be held in a register in the
|
| ++ * inner loops on many machines (more than can hold all three addresses,
|
| ++ * anyway).
|
| ++ */
|
| +
|
| ++#define R_Y_OFF 0 /* offset to R => Y section */
|
| ++#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */
|
| ++#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */
|
| ++#define TABLE_SIZE (3*(MAXJSAMPLE+1))
|
| ++
|
| ++
|
| ++/* Include inline routines for colorspace extensions */
|
| ++
|
| ++#include "jdcolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++
|
| ++#define RGB_RED EXT_RGB_RED
|
| ++#define RGB_GREEN EXT_RGB_GREEN
|
| ++#define RGB_BLUE EXT_RGB_BLUE
|
| ++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| ++#define ycc_rgb_convert_internal ycc_extrgb_convert_internal
|
| ++#define gray_rgb_convert_internal gray_extrgb_convert_internal
|
| ++#define rgb_rgb_convert_internal rgb_extrgb_convert_internal
|
| ++#include "jdcolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef ycc_rgb_convert_internal
|
| ++#undef gray_rgb_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_RGBX_RED
|
| ++#define RGB_GREEN EXT_RGBX_GREEN
|
| ++#define RGB_BLUE EXT_RGBX_BLUE
|
| ++#define RGB_ALPHA 3
|
| ++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| ++#define ycc_rgb_convert_internal ycc_extrgbx_convert_internal
|
| ++#define gray_rgb_convert_internal gray_extrgbx_convert_internal
|
| ++#define rgb_rgb_convert_internal rgb_extrgbx_convert_internal
|
| ++#include "jdcolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_ALPHA
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef ycc_rgb_convert_internal
|
| ++#undef gray_rgb_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_BGR_RED
|
| ++#define RGB_GREEN EXT_BGR_GREEN
|
| ++#define RGB_BLUE EXT_BGR_BLUE
|
| ++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| ++#define ycc_rgb_convert_internal ycc_extbgr_convert_internal
|
| ++#define gray_rgb_convert_internal gray_extbgr_convert_internal
|
| ++#define rgb_rgb_convert_internal rgb_extbgr_convert_internal
|
| ++#include "jdcolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef ycc_rgb_convert_internal
|
| ++#undef gray_rgb_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_BGRX_RED
|
| ++#define RGB_GREEN EXT_BGRX_GREEN
|
| ++#define RGB_BLUE EXT_BGRX_BLUE
|
| ++#define RGB_ALPHA 3
|
| ++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| ++#define ycc_rgb_convert_internal ycc_extbgrx_convert_internal
|
| ++#define gray_rgb_convert_internal gray_extbgrx_convert_internal
|
| ++#define rgb_rgb_convert_internal rgb_extbgrx_convert_internal
|
| ++#include "jdcolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_ALPHA
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef ycc_rgb_convert_internal
|
| ++#undef gray_rgb_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_XBGR_RED
|
| ++#define RGB_GREEN EXT_XBGR_GREEN
|
| ++#define RGB_BLUE EXT_XBGR_BLUE
|
| ++#define RGB_ALPHA 0
|
| ++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| ++#define ycc_rgb_convert_internal ycc_extxbgr_convert_internal
|
| ++#define gray_rgb_convert_internal gray_extxbgr_convert_internal
|
| ++#define rgb_rgb_convert_internal rgb_extxbgr_convert_internal
|
| ++#include "jdcolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_ALPHA
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef ycc_rgb_convert_internal
|
| ++#undef gray_rgb_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++#define RGB_RED EXT_XRGB_RED
|
| ++#define RGB_GREEN EXT_XRGB_GREEN
|
| ++#define RGB_BLUE EXT_XRGB_BLUE
|
| ++#define RGB_ALPHA 0
|
| ++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| ++#define ycc_rgb_convert_internal ycc_extxrgb_convert_internal
|
| ++#define gray_rgb_convert_internal gray_extxrgb_convert_internal
|
| ++#define rgb_rgb_convert_internal rgb_extxrgb_convert_internal
|
| ++#include "jdcolext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_ALPHA
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef ycc_rgb_convert_internal
|
| ++#undef gray_rgb_convert_internal
|
| ++#undef rgb_rgb_convert_internal
|
| ++
|
| ++
|
| + /*
|
| + * Initialize tables for YCC->RGB colorspace conversion.
|
| + */
|
| +@@ -110,13 +246,6 @@
|
| +
|
| + /*
|
| + * Convert some rows of samples to the output colorspace.
|
| +- *
|
| +- * Note that we change from noninterleaved, one-plane-per-component format
|
| +- * to interleaved-pixel format. The output buffer is therefore three times
|
| +- * as wide as the input buffer.
|
| +- * A starting row offset is provided only for the input buffer. The caller
|
| +- * can easily adjust the passed output_buf value to accommodate any row
|
| +- * offset required on that side.
|
| + */
|
| +
|
| + METHODDEF(void)
|
| +@@ -124,19 +253,86 @@
|
| + JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| + JSAMPARRAY output_buf, int num_rows)
|
| + {
|
| ++ switch (cinfo->out_color_space) {
|
| ++ case JCS_EXT_RGB:
|
| ++ ycc_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| ++ ycc_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGR:
|
| ++ ycc_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| ++ ycc_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| ++ ycc_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| ++ ycc_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ default:
|
| ++ ycc_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/**************** Cases other than YCbCr -> RGB **************/
|
| ++
|
| ++
|
| ++/*
|
| ++ * Initialize for RGB->grayscale colorspace conversion.
|
| ++ */
|
| ++
|
| ++LOCAL(void)
|
| ++build_rgb_y_table (j_decompress_ptr cinfo)
|
| ++{
|
| + my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
|
| +- register int y, cb, cr;
|
| ++ INT32 * rgb_y_tab;
|
| ++ INT32 i;
|
| ++
|
| ++ /* Allocate and fill in the conversion tables. */
|
| ++ cconvert->rgb_y_tab = rgb_y_tab = (INT32 *)
|
| ++ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
|
| ++ (TABLE_SIZE * SIZEOF(INT32)));
|
| ++
|
| ++ for (i = 0; i <= MAXJSAMPLE; i++) {
|
| ++ rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i;
|
| ++ rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i;
|
| ++ rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Convert RGB to grayscale.
|
| ++ */
|
| ++
|
| ++METHODDEF(void)
|
| ++rgb_gray_convert (j_decompress_ptr cinfo,
|
| ++ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| ++ JSAMPARRAY output_buf, int num_rows)
|
| ++{
|
| ++ my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
|
| ++ register int r, g, b;
|
| ++ register INT32 * ctab = cconvert->rgb_y_tab;
|
| + register JSAMPROW outptr;
|
| + register JSAMPROW inptr0, inptr1, inptr2;
|
| + register JDIMENSION col;
|
| + JDIMENSION num_cols = cinfo->output_width;
|
| +- /* copy these pointers into registers if possible */
|
| +- register JSAMPLE * range_limit = cinfo->sample_range_limit;
|
| +- register int * Crrtab = cconvert->Cr_r_tab;
|
| +- register int * Cbbtab = cconvert->Cb_b_tab;
|
| +- register INT32 * Crgtab = cconvert->Cr_g_tab;
|
| +- register INT32 * Cbgtab = cconvert->Cb_g_tab;
|
| +- SHIFT_TEMPS
|
| +
|
| + while (--num_rows >= 0) {
|
| + inptr0 = input_buf[0][input_row];
|
| +@@ -145,24 +341,18 @@
|
| + input_row++;
|
| + outptr = *output_buf++;
|
| + for (col = 0; col < num_cols; col++) {
|
| +- y = GETJSAMPLE(inptr0[col]);
|
| +- cb = GETJSAMPLE(inptr1[col]);
|
| +- cr = GETJSAMPLE(inptr2[col]);
|
| +- /* Range-limiting is essential due to noise introduced by DCT losses. */
|
| +- outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + Crrtab[cr]];
|
| +- outptr[rgb_green[cinfo->out_color_space]] = range_limit[y +
|
| +- ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
|
| +- SCALEBITS))];
|
| +- outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + Cbbtab[cb]];
|
| +- outptr += rgb_pixelsize[cinfo->out_color_space];
|
| ++ r = GETJSAMPLE(inptr0[col]);
|
| ++ g = GETJSAMPLE(inptr1[col]);
|
| ++ b = GETJSAMPLE(inptr2[col]);
|
| ++ /* Y */
|
| ++ outptr[col] = (JSAMPLE)
|
| ++ ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
|
| ++ >> SCALEBITS);
|
| + }
|
| + }
|
| + }
|
| +
|
| +
|
| +-/**************** Cases other than YCbCr -> RGB **************/
|
| +-
|
| +-
|
| + /*
|
| + * Color conversion for no colorspace change: just copy the data,
|
| + * converting from separate-planes to interleaved representation.
|
| +@@ -211,9 +401,7 @@
|
| +
|
| +
|
| + /*
|
| +- * Convert grayscale to RGB: just duplicate the graylevel three times.
|
| +- * This is provided to support applications that don't want to cope
|
| +- * with grayscale as a separate case.
|
| ++ * Convert grayscale to RGB
|
| + */
|
| +
|
| + METHODDEF(void)
|
| +@@ -221,20 +409,85 @@
|
| + JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| + JSAMPARRAY output_buf, int num_rows)
|
| + {
|
| +- register JSAMPROW inptr, outptr;
|
| +- register JDIMENSION col;
|
| +- JDIMENSION num_cols = cinfo->output_width;
|
| ++ switch (cinfo->out_color_space) {
|
| ++ case JCS_EXT_RGB:
|
| ++ gray_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| ++ gray_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGR:
|
| ++ gray_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| ++ gray_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| ++ gray_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| ++ gray_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ default:
|
| ++ gray_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ }
|
| ++}
|
| +
|
| +- while (--num_rows >= 0) {
|
| +- inptr = input_buf[0][input_row++];
|
| +- outptr = *output_buf++;
|
| +- for (col = 0; col < num_cols; col++) {
|
| +- /* We can dispense with GETJSAMPLE() here */
|
| +- outptr[rgb_red[cinfo->out_color_space]] =
|
| +- outptr[rgb_green[cinfo->out_color_space]] =
|
| +- outptr[rgb_blue[cinfo->out_color_space]] = inptr[col];
|
| +- outptr += rgb_pixelsize[cinfo->out_color_space];
|
| +- }
|
| ++
|
| ++/*
|
| ++ * Convert plain RGB to extended RGB
|
| ++ */
|
| ++
|
| ++METHODDEF(void)
|
| ++rgb_rgb_convert (j_decompress_ptr cinfo,
|
| ++ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| ++ JSAMPARRAY output_buf, int num_rows)
|
| ++{
|
| ++ switch (cinfo->out_color_space) {
|
| ++ case JCS_EXT_RGB:
|
| ++ rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| ++ rgb_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGR:
|
| ++ rgb_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| ++ rgb_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| ++ rgb_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| ++ rgb_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| ++ default:
|
| ++ rgb_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
| ++ num_rows);
|
| ++ break;
|
| + }
|
| + }
|
| +
|
| +@@ -356,6 +609,9 @@
|
| + /* For color->grayscale conversion, only the Y (0) component is needed */
|
| + for (ci = 1; ci < cinfo->num_components; ci++)
|
| + cinfo->comp_info[ci].component_needed = FALSE;
|
| ++ } else if (cinfo->jpeg_color_space == JCS_RGB) {
|
| ++ cconvert->pub.color_convert = rgb_gray_convert;
|
| ++ build_rgb_y_table(cinfo);
|
| + } else
|
| + ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
| + break;
|
| +@@ -367,6 +623,10 @@
|
| + case JCS_EXT_BGRX:
|
| + case JCS_EXT_XBGR:
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_RGBA:
|
| ++ case JCS_EXT_BGRA:
|
| ++ case JCS_EXT_ABGR:
|
| ++ case JCS_EXT_ARGB:
|
| + cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
|
| + if (cinfo->jpeg_color_space == JCS_YCbCr) {
|
| + if (jsimd_can_ycc_rgb())
|
| +@@ -377,9 +637,14 @@
|
| + }
|
| + } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
|
| + cconvert->pub.color_convert = gray_rgb_convert;
|
| +- } else if (cinfo->jpeg_color_space == cinfo->out_color_space &&
|
| +- rgb_pixelsize[cinfo->out_color_space] == 3) {
|
| +- cconvert->pub.color_convert = null_convert;
|
| ++ } else if (cinfo->jpeg_color_space == JCS_RGB) {
|
| ++ if (rgb_red[cinfo->out_color_space] == 0 &&
|
| ++ rgb_green[cinfo->out_color_space] == 1 &&
|
| ++ rgb_blue[cinfo->out_color_space] == 2 &&
|
| ++ rgb_pixelsize[cinfo->out_color_space] == 3)
|
| ++ cconvert->pub.color_convert = null_convert;
|
| ++ else
|
| ++ cconvert->pub.color_convert = rgb_rgb_convert;
|
| + } else
|
| + ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
| + break;
|
| +Index: jdct.h
|
| +===================================================================
|
| +--- jdct.h (revision 829)
|
| ++++ jdct.h (working copy)
|
| +@@ -95,9 +95,21 @@
|
| + #define jpeg_idct_islow jRDislow
|
| + #define jpeg_idct_ifast jRDifast
|
| + #define jpeg_idct_float jRDfloat
|
| ++#define jpeg_idct_7x7 jRD7x7
|
| ++#define jpeg_idct_6x6 jRD6x6
|
| ++#define jpeg_idct_5x5 jRD5x5
|
| + #define jpeg_idct_4x4 jRD4x4
|
| ++#define jpeg_idct_3x3 jRD3x3
|
| + #define jpeg_idct_2x2 jRD2x2
|
| + #define jpeg_idct_1x1 jRD1x1
|
| ++#define jpeg_idct_9x9 jRD9x9
|
| ++#define jpeg_idct_10x10 jRD10x10
|
| ++#define jpeg_idct_11x11 jRD11x11
|
| ++#define jpeg_idct_12x12 jRD12x12
|
| ++#define jpeg_idct_13x13 jRD13x13
|
| ++#define jpeg_idct_14x14 jRD14x14
|
| ++#define jpeg_idct_15x15 jRD15x15
|
| ++#define jpeg_idct_16x16 jRD16x16
|
| + #endif /* NEED_SHORT_EXTERNAL_NAMES */
|
| +
|
| + /* Extern declarations for the forward and inverse DCT routines. */
|
| +@@ -115,9 +127,21 @@
|
| + EXTERN(void) jpeg_idct_float
|
| + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_7x7
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_6x6
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_5x5
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| + EXTERN(void) jpeg_idct_4x4
|
| + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_3x3
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| + EXTERN(void) jpeg_idct_2x2
|
| + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| +@@ -124,6 +148,30 @@
|
| + EXTERN(void) jpeg_idct_1x1
|
| + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_9x9
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_10x10
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_11x11
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_12x12
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_13x13
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_14x14
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_15x15
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| ++EXTERN(void) jpeg_idct_16x16
|
| ++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
|
| +
|
| +
|
| + /*
|
| +Index: jddctmgr.c
|
| +===================================================================
|
| +--- jddctmgr.c (revision 829)
|
| ++++ jddctmgr.c (working copy)
|
| +@@ -1,9 +1,12 @@
|
| + /*
|
| + * jddctmgr.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1994-1996, Thomas G. Lane.
|
| ++ * Modified 2002-2010 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Copyright (C) 2010, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains the inverse-DCT management logic.
|
| +@@ -21,6 +24,7 @@
|
| + #include "jpeglib.h"
|
| + #include "jdct.h" /* Private declarations for DCT subsystem */
|
| + #include "jsimddct.h"
|
| ++#include "jpegcomp.h"
|
| +
|
| +
|
| + /*
|
| +@@ -100,7 +104,7 @@
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| + /* Select the proper IDCT routine for this component's scaling */
|
| +- switch (compptr->DCT_scaled_size) {
|
| ++ switch (compptr->_DCT_scaled_size) {
|
| + #ifdef IDCT_SCALING_SUPPORTED
|
| + case 1:
|
| + method_ptr = jpeg_idct_1x1;
|
| +@@ -113,6 +117,10 @@
|
| + method_ptr = jpeg_idct_2x2;
|
| + method = JDCT_ISLOW; /* jidctred uses islow-style table */
|
| + break;
|
| ++ case 3:
|
| ++ method_ptr = jpeg_idct_3x3;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| + case 4:
|
| + if (jsimd_can_idct_4x4())
|
| + method_ptr = jsimd_idct_4x4;
|
| +@@ -120,6 +128,18 @@
|
| + method_ptr = jpeg_idct_4x4;
|
| + method = JDCT_ISLOW; /* jidctred uses islow-style table */
|
| + break;
|
| ++ case 5:
|
| ++ method_ptr = jpeg_idct_5x5;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| ++ case 6:
|
| ++ method_ptr = jpeg_idct_6x6;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| ++ case 7:
|
| ++ method_ptr = jpeg_idct_7x7;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| + #endif
|
| + case DCTSIZE:
|
| + switch (cinfo->dct_method) {
|
| +@@ -155,8 +175,40 @@
|
| + break;
|
| + }
|
| + break;
|
| ++ case 9:
|
| ++ method_ptr = jpeg_idct_9x9;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| ++ case 10:
|
| ++ method_ptr = jpeg_idct_10x10;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| ++ case 11:
|
| ++ method_ptr = jpeg_idct_11x11;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| ++ case 12:
|
| ++ method_ptr = jpeg_idct_12x12;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| ++ case 13:
|
| ++ method_ptr = jpeg_idct_13x13;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| ++ case 14:
|
| ++ method_ptr = jpeg_idct_14x14;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| ++ case 15:
|
| ++ method_ptr = jpeg_idct_15x15;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| ++ case 16:
|
| ++ method_ptr = jpeg_idct_16x16;
|
| ++ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
| ++ break;
|
| + default:
|
| +- ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size);
|
| ++ ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size);
|
| + break;
|
| + }
|
| + idct->pub.inverse_DCT[ci] = method_ptr;
|
| +Index: jdhuff.c
|
| +===================================================================
|
| +--- jdhuff.c (revision 829)
|
| ++++ jdhuff.c (working copy)
|
| +@@ -1,8 +1,10 @@
|
| + /*
|
| + * jdhuff.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2009-2011, 2015, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains Huffman entropy decoding routines.
|
| +@@ -18,6 +20,7 @@
|
| + #include "jinclude.h"
|
| + #include "jpeglib.h"
|
| + #include "jdhuff.h" /* Declarations shared with jdphuff.c */
|
| ++#include "jpegcomp.h"
|
| +
|
| +
|
| + /*
|
| +@@ -122,7 +125,7 @@
|
| + if (compptr->component_needed) {
|
| + entropy->dc_needed[blkn] = TRUE;
|
| + /* we don't need the ACs if producing a 1/8th-size image */
|
| +- entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1);
|
| ++ entropy->ac_needed[blkn] = (compptr->_DCT_scaled_size > 1);
|
| + } else {
|
| + entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
|
| + }
|
| +@@ -225,6 +228,7 @@
|
| + dtbl->maxcode[l] = -1; /* -1 if no codes of this length */
|
| + }
|
| + }
|
| ++ dtbl->valoffset[17] = 0;
|
| + dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
|
| +
|
| + /* Compute lookahead tables to speed up decoding.
|
| +@@ -234,7 +238,8 @@
|
| + * with that code.
|
| + */
|
| +
|
| +- MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
|
| ++ for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
|
| ++ dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
|
| +
|
| + p = 0;
|
| + for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
|
| +@@ -243,8 +248,7 @@
|
| + /* Generate left-justified code followed by all possible bit sequences */
|
| + lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
|
| + for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
|
| +- dtbl->look_nbits[lookbits] = l;
|
| +- dtbl->look_sym[lookbits] = htbl->huffval[p];
|
| ++ dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
|
| + lookbits++;
|
| + }
|
| + }
|
| +@@ -389,6 +393,50 @@
|
| + }
|
| +
|
| +
|
| ++/* Macro version of the above, which performs much better but does not
|
| ++ handle markers. We have to hand off any blocks with markers to the
|
| ++ slower routines. */
|
| ++
|
| ++#define GET_BYTE \
|
| ++{ \
|
| ++ register int c0, c1; \
|
| ++ c0 = GETJOCTET(*buffer++); \
|
| ++ c1 = GETJOCTET(*buffer); \
|
| ++ /* Pre-execute most common case */ \
|
| ++ get_buffer = (get_buffer << 8) | c0; \
|
| ++ bits_left += 8; \
|
| ++ if (c0 == 0xFF) { \
|
| ++ /* Pre-execute case of FF/00, which represents an FF data byte */ \
|
| ++ buffer++; \
|
| ++ if (c1 != 0) { \
|
| ++ /* Oops, it's actually a marker indicating end of compressed data. */ \
|
| ++ cinfo->unread_marker = c1; \
|
| ++ /* Back out pre-execution and fill the buffer with zero bits */ \
|
| ++ buffer -= 2; \
|
| ++ get_buffer &= ~0xFF; \
|
| ++ } \
|
| ++ } \
|
| ++}
|
| ++
|
| ++#if __WORDSIZE == 64 || defined(_WIN64)
|
| ++
|
| ++/* Pre-fetch 48 bytes, because the holding register is 64-bit */
|
| ++#define FILL_BIT_BUFFER_FAST \
|
| ++ if (bits_left < 16) { \
|
| ++ GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE \
|
| ++ }
|
| ++
|
| ++#else
|
| ++
|
| ++/* Pre-fetch 16 bytes, because the holding register is 32-bit */
|
| ++#define FILL_BIT_BUFFER_FAST \
|
| ++ if (bits_left < 16) { \
|
| ++ GET_BYTE GET_BYTE \
|
| ++ }
|
| ++
|
| ++#endif
|
| ++
|
| ++
|
| + /*
|
| + * Out-of-line code for Huffman code decoding.
|
| + * See jdhuff.h for info about usage.
|
| +@@ -438,9 +486,10 @@
|
| + * On some machines, a shift and add will be faster than a table lookup.
|
| + */
|
| +
|
| ++#define AVOID_TABLES
|
| + #ifdef AVOID_TABLES
|
| +
|
| +-#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
|
| ++#define HUFF_EXTEND(x,s) ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((-1)<<(s)) + 1)))
|
| +
|
| + #else
|
| +
|
| +@@ -498,6 +547,191 @@
|
| + }
|
| +
|
| +
|
| ++LOCAL(boolean)
|
| ++decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
| ++{
|
| ++ huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
| ++ BITREAD_STATE_VARS;
|
| ++ int blkn;
|
| ++ savable_state state;
|
| ++ /* Outer loop handles each block in the MCU */
|
| ++
|
| ++ /* Load up working state */
|
| ++ BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
|
| ++ ASSIGN_STATE(state, entropy->saved);
|
| ++
|
| ++ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
| ++ JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
|
| ++ d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
|
| ++ d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
|
| ++ register int s, k, r;
|
| ++
|
| ++ /* Decode a single block's worth of coefficients */
|
| ++
|
| ++ /* Section F.2.2.1: decode the DC coefficient difference */
|
| ++ HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
|
| ++ if (s) {
|
| ++ CHECK_BIT_BUFFER(br_state, s, return FALSE);
|
| ++ r = GET_BITS(s);
|
| ++ s = HUFF_EXTEND(r, s);
|
| ++ }
|
| ++
|
| ++ if (entropy->dc_needed[blkn]) {
|
| ++ /* Convert DC difference to actual value, update last_dc_val */
|
| ++ int ci = cinfo->MCU_membership[blkn];
|
| ++ s += state.last_dc_val[ci];
|
| ++ state.last_dc_val[ci] = s;
|
| ++ if (block) {
|
| ++ /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
|
| ++ (*block)[0] = (JCOEF) s;
|
| ++ }
|
| ++ }
|
| ++
|
| ++ if (entropy->ac_needed[blkn] && block) {
|
| ++
|
| ++ /* Section F.2.2.2: decode the AC coefficients */
|
| ++ /* Since zeroes are skipped, output area must be cleared beforehand */
|
| ++ for (k = 1; k < DCTSIZE2; k++) {
|
| ++ HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
|
| ++
|
| ++ r = s >> 4;
|
| ++ s &= 15;
|
| ++
|
| ++ if (s) {
|
| ++ k += r;
|
| ++ CHECK_BIT_BUFFER(br_state, s, return FALSE);
|
| ++ r = GET_BITS(s);
|
| ++ s = HUFF_EXTEND(r, s);
|
| ++ /* Output coefficient in natural (dezigzagged) order.
|
| ++ * Note: the extra entries in jpeg_natural_order[] will save us
|
| ++ * if k >= DCTSIZE2, which could happen if the data is corrupted.
|
| ++ */
|
| ++ (*block)[jpeg_natural_order[k]] = (JCOEF) s;
|
| ++ } else {
|
| ++ if (r != 15)
|
| ++ break;
|
| ++ k += 15;
|
| ++ }
|
| ++ }
|
| ++
|
| ++ } else {
|
| ++
|
| ++ /* Section F.2.2.2: decode the AC coefficients */
|
| ++ /* In this path we just discard the values */
|
| ++ for (k = 1; k < DCTSIZE2; k++) {
|
| ++ HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
|
| ++
|
| ++ r = s >> 4;
|
| ++ s &= 15;
|
| ++
|
| ++ if (s) {
|
| ++ k += r;
|
| ++ CHECK_BIT_BUFFER(br_state, s, return FALSE);
|
| ++ DROP_BITS(s);
|
| ++ } else {
|
| ++ if (r != 15)
|
| ++ break;
|
| ++ k += 15;
|
| ++ }
|
| ++ }
|
| ++ }
|
| ++ }
|
| ++
|
| ++ /* Completed MCU, so update state */
|
| ++ BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
|
| ++ ASSIGN_STATE(entropy->saved, state);
|
| ++ return TRUE;
|
| ++}
|
| ++
|
| ++
|
| ++LOCAL(boolean)
|
| ++decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
| ++{
|
| ++ huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
| ++ BITREAD_STATE_VARS;
|
| ++ JOCTET *buffer;
|
| ++ int blkn;
|
| ++ savable_state state;
|
| ++ /* Outer loop handles each block in the MCU */
|
| ++
|
| ++ /* Load up working state */
|
| ++ BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
|
| ++ buffer = (JOCTET *) br_state.next_input_byte;
|
| ++ ASSIGN_STATE(state, entropy->saved);
|
| ++
|
| ++ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
| ++ JBLOCKROW block = MCU_data[blkn];
|
| ++ d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
|
| ++ d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
|
| ++ register int s, k, r, l;
|
| ++
|
| ++ HUFF_DECODE_FAST(s, l, dctbl, slow_decode_mcu);
|
| ++ if (s) {
|
| ++ FILL_BIT_BUFFER_FAST
|
| ++ r = GET_BITS(s);
|
| ++ s = HUFF_EXTEND(r, s);
|
| ++ }
|
| ++
|
| ++ if (entropy->dc_needed[blkn]) {
|
| ++ int ci = cinfo->MCU_membership[blkn];
|
| ++ s += state.last_dc_val[ci];
|
| ++ state.last_dc_val[ci] = s;
|
| ++ if (block)
|
| ++ (*block)[0] = (JCOEF) s;
|
| ++ }
|
| ++
|
| ++ if (entropy->ac_needed[blkn] && block) {
|
| ++
|
| ++ for (k = 1; k < DCTSIZE2; k++) {
|
| ++ HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu);
|
| ++ r = s >> 4;
|
| ++ s &= 15;
|
| ++
|
| ++ if (s) {
|
| ++ k += r;
|
| ++ FILL_BIT_BUFFER_FAST
|
| ++ r = GET_BITS(s);
|
| ++ s = HUFF_EXTEND(r, s);
|
| ++ (*block)[jpeg_natural_order[k]] = (JCOEF) s;
|
| ++ } else {
|
| ++ if (r != 15) break;
|
| ++ k += 15;
|
| ++ }
|
| ++ }
|
| ++
|
| ++ } else {
|
| ++
|
| ++ for (k = 1; k < DCTSIZE2; k++) {
|
| ++ HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu);
|
| ++ r = s >> 4;
|
| ++ s &= 15;
|
| ++
|
| ++ if (s) {
|
| ++ k += r;
|
| ++ FILL_BIT_BUFFER_FAST
|
| ++ DROP_BITS(s);
|
| ++ } else {
|
| ++ if (r != 15) break;
|
| ++ k += 15;
|
| ++ }
|
| ++ }
|
| ++ }
|
| ++ }
|
| ++
|
| ++ if (cinfo->unread_marker != 0) {
|
| ++slow_decode_mcu:
|
| ++ cinfo->unread_marker = 0;
|
| ++ return FALSE;
|
| ++ }
|
| ++
|
| ++ br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
|
| ++ br_state.next_input_byte = buffer;
|
| ++ BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
|
| ++ ASSIGN_STATE(entropy->saved, state);
|
| ++ return TRUE;
|
| ++}
|
| ++
|
| ++
|
| + /*
|
| + * Decode and return one MCU's worth of Huffman-compressed coefficients.
|
| + * The coefficients are reordered from zigzag order into natural array order,
|
| +@@ -513,13 +747,13 @@
|
| + * this module, since we'll just re-assign them on the next call.)
|
| + */
|
| +
|
| ++#define BUFSIZE (DCTSIZE2 * 2u)
|
| ++
|
| + METHODDEF(boolean)
|
| + decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
| + {
|
| + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
| +- int blkn;
|
| +- BITREAD_STATE_VARS;
|
| +- savable_state state;
|
| ++ int usefast = 1;
|
| +
|
| + /* Process restart marker if needed; may have to suspend */
|
| + if (cinfo->restart_interval) {
|
| +@@ -526,98 +760,26 @@
|
| + if (entropy->restarts_to_go == 0)
|
| + if (! process_restart(cinfo))
|
| + return FALSE;
|
| ++ usefast = 0;
|
| + }
|
| +
|
| ++ if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU
|
| ++ || cinfo->unread_marker != 0)
|
| ++ usefast = 0;
|
| ++
|
| + /* If we've run out of data, just leave the MCU set to zeroes.
|
| + * This way, we return uniform gray for the remainder of the segment.
|
| + */
|
| + if (! entropy->pub.insufficient_data) {
|
| +
|
| +- /* Load up working state */
|
| +- BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
|
| +- ASSIGN_STATE(state, entropy->saved);
|
| +-
|
| +- /* Outer loop handles each block in the MCU */
|
| +-
|
| +- for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
| +- JBLOCKROW block = MCU_data[blkn];
|
| +- d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
|
| +- d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
|
| +- register int s, k, r;
|
| +-
|
| +- /* Decode a single block's worth of coefficients */
|
| +-
|
| +- /* Section F.2.2.1: decode the DC coefficient difference */
|
| +- HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
|
| +- if (s) {
|
| +- CHECK_BIT_BUFFER(br_state, s, return FALSE);
|
| +- r = GET_BITS(s);
|
| +- s = HUFF_EXTEND(r, s);
|
| +- }
|
| +-
|
| +- if (entropy->dc_needed[blkn]) {
|
| +- /* Convert DC difference to actual value, update last_dc_val */
|
| +- int ci = cinfo->MCU_membership[blkn];
|
| +- s += state.last_dc_val[ci];
|
| +- state.last_dc_val[ci] = s;
|
| +- /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
|
| +- (*block)[0] = (JCOEF) s;
|
| +- }
|
| +-
|
| +- if (entropy->ac_needed[blkn]) {
|
| +-
|
| +- /* Section F.2.2.2: decode the AC coefficients */
|
| +- /* Since zeroes are skipped, output area must be cleared beforehand */
|
| +- for (k = 1; k < DCTSIZE2; k++) {
|
| +- HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
|
| +-
|
| +- r = s >> 4;
|
| +- s &= 15;
|
| +-
|
| +- if (s) {
|
| +- k += r;
|
| +- CHECK_BIT_BUFFER(br_state, s, return FALSE);
|
| +- r = GET_BITS(s);
|
| +- s = HUFF_EXTEND(r, s);
|
| +- /* Output coefficient in natural (dezigzagged) order.
|
| +- * Note: the extra entries in jpeg_natural_order[] will save us
|
| +- * if k >= DCTSIZE2, which could happen if the data is corrupted.
|
| +- */
|
| +- (*block)[jpeg_natural_order[k]] = (JCOEF) s;
|
| +- } else {
|
| +- if (r != 15)
|
| +- break;
|
| +- k += 15;
|
| +- }
|
| +- }
|
| +-
|
| +- } else {
|
| +-
|
| +- /* Section F.2.2.2: decode the AC coefficients */
|
| +- /* In this path we just discard the values */
|
| +- for (k = 1; k < DCTSIZE2; k++) {
|
| +- HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
|
| +-
|
| +- r = s >> 4;
|
| +- s &= 15;
|
| +-
|
| +- if (s) {
|
| +- k += r;
|
| +- CHECK_BIT_BUFFER(br_state, s, return FALSE);
|
| +- DROP_BITS(s);
|
| +- } else {
|
| +- if (r != 15)
|
| +- break;
|
| +- k += 15;
|
| +- }
|
| +- }
|
| +-
|
| +- }
|
| ++ if (usefast) {
|
| ++ if (!decode_mcu_fast(cinfo, MCU_data)) goto use_slow;
|
| + }
|
| ++ else {
|
| ++ use_slow:
|
| ++ if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE;
|
| ++ }
|
| +
|
| +- /* Completed MCU, so update state */
|
| +- BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
|
| +- ASSIGN_STATE(entropy->saved, state);
|
| + }
|
| +
|
| + /* Account for restart interval (no-op if not using restarts) */
|
| +Index: jdhuff.h
|
| +===================================================================
|
| +--- jdhuff.h (revision 829)
|
| ++++ jdhuff.h (working copy)
|
| +@@ -1,8 +1,10 @@
|
| + /*
|
| + * jdhuff.h
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modifications:
|
| ++ * Copyright (C) 2010-2011, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains declarations for Huffman entropy decoding routines
|
| +@@ -27,7 +29,7 @@
|
| + /* Basic tables: (element [0] of each array is unused) */
|
| + INT32 maxcode[18]; /* largest code of length k (-1 if none) */
|
| + /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
|
| +- INT32 valoffset[17]; /* huffval[] offset for codes of length k */
|
| ++ INT32 valoffset[18]; /* huffval[] offset for codes of length k */
|
| + /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
|
| + * the smallest code of length k; so given a code of length k, the
|
| + * corresponding symbol is huffval[code + valoffset[k]]
|
| +@@ -36,13 +38,17 @@
|
| + /* Link to public Huffman table (needed only in jpeg_huff_decode) */
|
| + JHUFF_TBL *pub;
|
| +
|
| +- /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
|
| ++ /* Lookahead table: indexed by the next HUFF_LOOKAHEAD bits of
|
| + * the input data stream. If the next Huffman code is no more
|
| + * than HUFF_LOOKAHEAD bits long, we can obtain its length and
|
| +- * the corresponding symbol directly from these tables.
|
| ++ * the corresponding symbol directly from this tables.
|
| ++ *
|
| ++ * The lower 8 bits of each table entry contain the number of
|
| ++ * bits in the corresponding Huffman code, or HUFF_LOOKAHEAD + 1
|
| ++ * if too long. The next 8 bits of each entry contain the
|
| ++ * symbol.
|
| + */
|
| +- int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
|
| +- UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
|
| ++ int lookup[1<<HUFF_LOOKAHEAD];
|
| + } d_derived_tbl;
|
| +
|
| + /* Expand a Huffman table definition into the derived format */
|
| +@@ -69,9 +75,18 @@
|
| + * necessary.
|
| + */
|
| +
|
| ++#if __WORDSIZE == 64 || defined(_WIN64)
|
| ++
|
| ++typedef size_t bit_buf_type; /* type of bit-extraction buffer */
|
| ++#define BIT_BUF_SIZE 64 /* size of buffer in bits */
|
| ++
|
| ++#else
|
| ++
|
| + typedef INT32 bit_buf_type; /* type of bit-extraction buffer */
|
| +-#define BIT_BUF_SIZE 32 /* size of buffer in bits */
|
| ++#define BIT_BUF_SIZE 32 /* size of buffer in bits */
|
| +
|
| ++#endif
|
| ++
|
| + /* If long is > 32 bits on your machine, and shifting/masking longs is
|
| + * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
|
| + * appropriately should be a win. Unfortunately we can't define the size
|
| +@@ -183,11 +198,10 @@
|
| + } \
|
| + } \
|
| + look = PEEK_BITS(HUFF_LOOKAHEAD); \
|
| +- if ((nb = htbl->look_nbits[look]) != 0) { \
|
| ++ if ((nb = (htbl->lookup[look] >> HUFF_LOOKAHEAD)) <= HUFF_LOOKAHEAD) { \
|
| + DROP_BITS(nb); \
|
| +- result = htbl->look_sym[look]; \
|
| ++ result = htbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); \
|
| + } else { \
|
| +- nb = HUFF_LOOKAHEAD+1; \
|
| + slowlabel: \
|
| + if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
|
| + { failaction; } \
|
| +@@ -195,6 +209,28 @@
|
| + } \
|
| + }
|
| +
|
| ++#define HUFF_DECODE_FAST(s,nb,htbl,slowlabel) \
|
| ++ FILL_BIT_BUFFER_FAST; \
|
| ++ s = PEEK_BITS(HUFF_LOOKAHEAD); \
|
| ++ s = htbl->lookup[s]; \
|
| ++ nb = s >> HUFF_LOOKAHEAD; \
|
| ++ /* Pre-execute the common case of nb <= HUFF_LOOKAHEAD */ \
|
| ++ DROP_BITS(nb); \
|
| ++ s = s & ((1 << HUFF_LOOKAHEAD) - 1); \
|
| ++ if (nb > HUFF_LOOKAHEAD) { \
|
| ++ /* Equivalent of jpeg_huff_decode() */ \
|
| ++ /* Don't use GET_BITS() here because we don't want to modify bits_left */ \
|
| ++ s = (get_buffer >> bits_left) & ((1 << (nb)) - 1); \
|
| ++ while (s > htbl->maxcode[nb]) { \
|
| ++ s <<= 1; \
|
| ++ s |= GET_BITS(1); \
|
| ++ nb++; \
|
| ++ } \
|
| ++ if (nb > 16) \
|
| ++ goto slowlabel; \
|
| ++ s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) ]; \
|
| ++ }
|
| ++
|
| + /* Out-of-line case for Huffman code fetching */
|
| + EXTERN(int) jpeg_huff_decode
|
| + JPP((bitread_working_state * state, register bit_buf_type get_buffer,
|
| +Index: jdinput.c
|
| +===================================================================
|
| +--- jdinput.c (revision 829)
|
| ++++ jdinput.c (working copy)
|
| +@@ -1,8 +1,10 @@
|
| + /*
|
| + * jdinput.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2010, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains input control logic for the JPEG decompressor.
|
| +@@ -14,6 +16,7 @@
|
| + #define JPEG_INTERNALS
|
| + #include "jinclude.h"
|
| + #include "jpeglib.h"
|
| ++#include "jpegcomp.h"
|
| +
|
| +
|
| + /* Private state */
|
| +@@ -70,16 +73,30 @@
|
| + compptr->v_samp_factor);
|
| + }
|
| +
|
| ++#if JPEG_LIB_VERSION >=80
|
| ++ cinfo->block_size = DCTSIZE;
|
| ++ cinfo->natural_order = jpeg_natural_order;
|
| ++ cinfo->lim_Se = DCTSIZE2-1;
|
| ++#endif
|
| ++
|
| + /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE.
|
| + * In the full decompressor, this will be overridden by jdmaster.c;
|
| + * but in the transcoder, jdmaster.c is not used, so we must do it here.
|
| + */
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE;
|
| ++#else
|
| + cinfo->min_DCT_scaled_size = DCTSIZE;
|
| ++#endif
|
| +
|
| + /* Compute dimensions of components */
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
|
| ++#else
|
| + compptr->DCT_scaled_size = DCTSIZE;
|
| ++#endif
|
| + /* Size in DCT blocks */
|
| + compptr->width_in_blocks = (JDIMENSION)
|
| + jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
|
| +@@ -138,7 +155,7 @@
|
| + compptr->MCU_width = 1;
|
| + compptr->MCU_height = 1;
|
| + compptr->MCU_blocks = 1;
|
| +- compptr->MCU_sample_width = compptr->DCT_scaled_size;
|
| ++ compptr->MCU_sample_width = compptr->_DCT_scaled_size;
|
| + compptr->last_col_width = 1;
|
| + /* For noninterleaved scans, it is convenient to define last_row_height
|
| + * as the number of block rows present in the last iMCU row.
|
| +@@ -174,7 +191,7 @@
|
| + compptr->MCU_width = compptr->h_samp_factor;
|
| + compptr->MCU_height = compptr->v_samp_factor;
|
| + compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
|
| +- compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_scaled_size;
|
| ++ compptr->MCU_sample_width = compptr->MCU_width * compptr->_DCT_scaled_size;
|
| + /* Figure number of non-dummy blocks in last MCU column & row */
|
| + tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
|
| + if (tmp == 0) tmp = compptr->MCU_width;
|
| +Index: jdmainct.c
|
| +===================================================================
|
| +--- jdmainct.c (revision 829)
|
| ++++ jdmainct.c (working copy)
|
| +@@ -1,8 +1,10 @@
|
| + /*
|
| + * jdmainct.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1994-1996, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2010, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains the main buffer controller for decompression.
|
| +@@ -13,9 +15,7 @@
|
| + * supplies the equivalent of the main buffer in that case.
|
| + */
|
| +
|
| +-#define JPEG_INTERNALS
|
| +-#include "jinclude.h"
|
| +-#include "jpeglib.h"
|
| ++#include "jdmainct.h"
|
| +
|
| +
|
| + /*
|
| +@@ -109,36 +109,6 @@
|
| + */
|
| +
|
| +
|
| +-/* Private buffer controller object */
|
| +-
|
| +-typedef struct {
|
| +- struct jpeg_d_main_controller pub; /* public fields */
|
| +-
|
| +- /* Pointer to allocated workspace (M or M+2 row groups). */
|
| +- JSAMPARRAY buffer[MAX_COMPONENTS];
|
| +-
|
| +- boolean buffer_full; /* Have we gotten an iMCU row from decoder? */
|
| +- JDIMENSION rowgroup_ctr; /* counts row groups output to postprocessor */
|
| +-
|
| +- /* Remaining fields are only used in the context case. */
|
| +-
|
| +- /* These are the master pointers to the funny-order pointer lists. */
|
| +- JSAMPIMAGE xbuffer[2]; /* pointers to weird pointer lists */
|
| +-
|
| +- int whichptr; /* indicates which pointer set is now in use */
|
| +- int context_state; /* process_data state machine status */
|
| +- JDIMENSION rowgroups_avail; /* row groups available to postprocessor */
|
| +- JDIMENSION iMCU_row_ctr; /* counts iMCU rows to detect image top/bot */
|
| +-} my_main_controller;
|
| +-
|
| +-typedef my_main_controller * my_main_ptr;
|
| +-
|
| +-/* context_state values: */
|
| +-#define CTX_PREPARE_FOR_IMCU 0 /* need to prepare for MCU row */
|
| +-#define CTX_PROCESS_IMCU 1 /* feeding iMCU to postprocessor */
|
| +-#define CTX_POSTPONED_ROW 2 /* feeding postponed row group */
|
| +-
|
| +-
|
| + /* Forward declarations */
|
| + METHODDEF(void) process_data_simple_main
|
| + JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
|
| +@@ -159,9 +129,9 @@
|
| + * This is done only once, not once per pass.
|
| + */
|
| + {
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| + int ci, rgroup;
|
| +- int M = cinfo->min_DCT_scaled_size;
|
| ++ int M = cinfo->_min_DCT_scaled_size;
|
| + jpeg_component_info *compptr;
|
| + JSAMPARRAY xbuf;
|
| +
|
| +@@ -168,15 +138,15 @@
|
| + /* Get top-level space for component array pointers.
|
| + * We alloc both arrays with one call to save a few cycles.
|
| + */
|
| +- main->xbuffer[0] = (JSAMPIMAGE)
|
| ++ main_ptr->xbuffer[0] = (JSAMPIMAGE)
|
| + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
|
| + cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
|
| +- main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components;
|
| ++ main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components;
|
| +
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| +- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
|
| +- cinfo->min_DCT_scaled_size; /* height of a row group of component */
|
| ++ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
|
| ++ cinfo->_min_DCT_scaled_size; /* height of a row group of component */
|
| + /* Get space for pointer lists --- M+4 row groups in each list.
|
| + * We alloc both pointer lists with one call to save a few cycles.
|
| + */
|
| +@@ -184,9 +154,9 @@
|
| + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
|
| + 2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
|
| + xbuf += rgroup; /* want one row group at negative offsets */
|
| +- main->xbuffer[0][ci] = xbuf;
|
| ++ main_ptr->xbuffer[0][ci] = xbuf;
|
| + xbuf += rgroup * (M + 4);
|
| +- main->xbuffer[1][ci] = xbuf;
|
| ++ main_ptr->xbuffer[1][ci] = xbuf;
|
| + }
|
| + }
|
| +
|
| +@@ -194,26 +164,26 @@
|
| + LOCAL(void)
|
| + make_funny_pointers (j_decompress_ptr cinfo)
|
| + /* Create the funny pointer lists discussed in the comments above.
|
| +- * The actual workspace is already allocated (in main->buffer),
|
| ++ * The actual workspace is already allocated (in main_ptr->buffer),
|
| + * and the space for the pointer lists is allocated too.
|
| + * This routine just fills in the curiously ordered lists.
|
| + * This will be repeated at the beginning of each pass.
|
| + */
|
| + {
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| + int ci, i, rgroup;
|
| +- int M = cinfo->min_DCT_scaled_size;
|
| ++ int M = cinfo->_min_DCT_scaled_size;
|
| + jpeg_component_info *compptr;
|
| + JSAMPARRAY buf, xbuf0, xbuf1;
|
| +
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| +- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
|
| +- cinfo->min_DCT_scaled_size; /* height of a row group of component */
|
| +- xbuf0 = main->xbuffer[0][ci];
|
| +- xbuf1 = main->xbuffer[1][ci];
|
| ++ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
|
| ++ cinfo->_min_DCT_scaled_size; /* height of a row group of component */
|
| ++ xbuf0 = main_ptr->xbuffer[0][ci];
|
| ++ xbuf1 = main_ptr->xbuffer[1][ci];
|
| + /* First copy the workspace pointers as-is */
|
| +- buf = main->buffer[ci];
|
| ++ buf = main_ptr->buffer[ci];
|
| + for (i = 0; i < rgroup * (M + 2); i++) {
|
| + xbuf0[i] = xbuf1[i] = buf[i];
|
| + }
|
| +@@ -235,34 +205,6 @@
|
| +
|
| +
|
| + LOCAL(void)
|
| +-set_wraparound_pointers (j_decompress_ptr cinfo)
|
| +-/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
|
| +- * This changes the pointer list state from top-of-image to the normal state.
|
| +- */
|
| +-{
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| +- int ci, i, rgroup;
|
| +- int M = cinfo->min_DCT_scaled_size;
|
| +- jpeg_component_info *compptr;
|
| +- JSAMPARRAY xbuf0, xbuf1;
|
| +-
|
| +- for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| +- ci++, compptr++) {
|
| +- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
|
| +- cinfo->min_DCT_scaled_size; /* height of a row group of component */
|
| +- xbuf0 = main->xbuffer[0][ci];
|
| +- xbuf1 = main->xbuffer[1][ci];
|
| +- for (i = 0; i < rgroup; i++) {
|
| +- xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
|
| +- xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
|
| +- xbuf0[rgroup*(M+2) + i] = xbuf0[i];
|
| +- xbuf1[rgroup*(M+2) + i] = xbuf1[i];
|
| +- }
|
| +- }
|
| +-}
|
| +-
|
| +-
|
| +-LOCAL(void)
|
| + set_bottom_pointers (j_decompress_ptr cinfo)
|
| + /* Change the pointer lists to duplicate the last sample row at the bottom
|
| + * of the image. whichptr indicates which xbuffer holds the final iMCU row.
|
| +@@ -269,7 +211,7 @@
|
| + * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
|
| + */
|
| + {
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| + int ci, i, rgroup, iMCUheight, rows_left;
|
| + jpeg_component_info *compptr;
|
| + JSAMPARRAY xbuf;
|
| +@@ -277,8 +219,8 @@
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| + /* Count sample rows in one iMCU row and in one row group */
|
| +- iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size;
|
| +- rgroup = iMCUheight / cinfo->min_DCT_scaled_size;
|
| ++ iMCUheight = compptr->v_samp_factor * compptr->_DCT_scaled_size;
|
| ++ rgroup = iMCUheight / cinfo->_min_DCT_scaled_size;
|
| + /* Count nondummy sample rows remaining for this component */
|
| + rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
|
| + if (rows_left == 0) rows_left = iMCUheight;
|
| +@@ -286,12 +228,12 @@
|
| + * so we need only do it once.
|
| + */
|
| + if (ci == 0) {
|
| +- main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
|
| ++ main_ptr->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
|
| + }
|
| + /* Duplicate the last real sample row rgroup*2 times; this pads out the
|
| + * last partial rowgroup and ensures at least one full rowgroup of context.
|
| + */
|
| +- xbuf = main->xbuffer[main->whichptr][ci];
|
| ++ xbuf = main_ptr->xbuffer[main_ptr->whichptr][ci];
|
| + for (i = 0; i < rgroup * 2; i++) {
|
| + xbuf[rows_left + i] = xbuf[rows_left-1];
|
| + }
|
| +@@ -306,27 +248,27 @@
|
| + METHODDEF(void)
|
| + start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
|
| + {
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| +
|
| + switch (pass_mode) {
|
| + case JBUF_PASS_THRU:
|
| + if (cinfo->upsample->need_context_rows) {
|
| +- main->pub.process_data = process_data_context_main;
|
| ++ main_ptr->pub.process_data = process_data_context_main;
|
| + make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
|
| +- main->whichptr = 0; /* Read first iMCU row into xbuffer[0] */
|
| +- main->context_state = CTX_PREPARE_FOR_IMCU;
|
| +- main->iMCU_row_ctr = 0;
|
| ++ main_ptr->whichptr = 0; /* Read first iMCU row into xbuffer[0] */
|
| ++ main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
|
| ++ main_ptr->iMCU_row_ctr = 0;
|
| + } else {
|
| + /* Simple case with no context needed */
|
| +- main->pub.process_data = process_data_simple_main;
|
| ++ main_ptr->pub.process_data = process_data_simple_main;
|
| + }
|
| +- main->buffer_full = FALSE; /* Mark buffer empty */
|
| +- main->rowgroup_ctr = 0;
|
| ++ main_ptr->buffer_full = FALSE; /* Mark buffer empty */
|
| ++ main_ptr->rowgroup_ctr = 0;
|
| + break;
|
| + #ifdef QUANT_2PASS_SUPPORTED
|
| + case JBUF_CRANK_DEST:
|
| + /* For last pass of 2-pass quantization, just crank the postprocessor */
|
| +- main->pub.process_data = process_data_crank_post;
|
| ++ main_ptr->pub.process_data = process_data_crank_post;
|
| + break;
|
| + #endif
|
| + default:
|
| +@@ -346,18 +288,18 @@
|
| + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
|
| + JDIMENSION out_rows_avail)
|
| + {
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| + JDIMENSION rowgroups_avail;
|
| +
|
| + /* Read input data if we haven't filled the main buffer yet */
|
| +- if (! main->buffer_full) {
|
| +- if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer))
|
| ++ if (! main_ptr->buffer_full) {
|
| ++ if (! (*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer))
|
| + return; /* suspension forced, can do nothing more */
|
| +- main->buffer_full = TRUE; /* OK, we have an iMCU row to work with */
|
| ++ main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */
|
| + }
|
| +
|
| + /* There are always min_DCT_scaled_size row groups in an iMCU row. */
|
| +- rowgroups_avail = (JDIMENSION) cinfo->min_DCT_scaled_size;
|
| ++ rowgroups_avail = (JDIMENSION) cinfo->_min_DCT_scaled_size;
|
| + /* Note: at the bottom of the image, we may pass extra garbage row groups
|
| + * to the postprocessor. The postprocessor has to check for bottom
|
| + * of image anyway (at row resolution), so no point in us doing it too.
|
| +@@ -364,14 +306,14 @@
|
| + */
|
| +
|
| + /* Feed the postprocessor */
|
| +- (*cinfo->post->post_process_data) (cinfo, main->buffer,
|
| +- &main->rowgroup_ctr, rowgroups_avail,
|
| ++ (*cinfo->post->post_process_data) (cinfo, main_ptr->buffer,
|
| ++ &main_ptr->rowgroup_ctr, rowgroups_avail,
|
| + output_buf, out_row_ctr, out_rows_avail);
|
| +
|
| + /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
|
| +- if (main->rowgroup_ctr >= rowgroups_avail) {
|
| +- main->buffer_full = FALSE;
|
| +- main->rowgroup_ctr = 0;
|
| ++ if (main_ptr->rowgroup_ctr >= rowgroups_avail) {
|
| ++ main_ptr->buffer_full = FALSE;
|
| ++ main_ptr->rowgroup_ctr = 0;
|
| + }
|
| + }
|
| +
|
| +@@ -386,15 +328,15 @@
|
| + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
|
| + JDIMENSION out_rows_avail)
|
| + {
|
| +- my_main_ptr main = (my_main_ptr) cinfo->main;
|
| ++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
| +
|
| + /* Read input data if we haven't filled the main buffer yet */
|
| +- if (! main->buffer_full) {
|
| ++ if (! main_ptr->buffer_full) {
|
| + if (! (*cinfo->coef->decompress_data) (cinfo,
|
| +- main->xbuffer[main->whichptr]))
|
| ++ main_ptr->xbuffer[main_ptr->whichptr]))
|
| + return; /* suspension forced, can do nothing more */
|
| +- main->buffer_full = TRUE; /* OK, we have an iMCU row to work with */
|
| +- main->iMCU_row_ctr++; /* count rows received */
|
| ++ main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */
|
| ++ main_ptr->iMCU_row_ctr++; /* count rows received */
|
| + }
|
| +
|
| + /* Postprocessor typically will not swallow all the input data it is handed
|
| +@@ -402,47 +344,47 @@
|
| + * to exit and restart. This switch lets us keep track of how far we got.
|
| + * Note that each case falls through to the next on successful completion.
|
| + */
|
| +- switch (main->context_state) {
|
| ++ switch (main_ptr->context_state) {
|
| + case CTX_POSTPONED_ROW:
|
| + /* Call postprocessor using previously set pointers for postponed row */
|
| +- (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
|
| +- &main->rowgroup_ctr, main->rowgroups_avail,
|
| ++ (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr],
|
| ++ &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
|
| + output_buf, out_row_ctr, out_rows_avail);
|
| +- if (main->rowgroup_ctr < main->rowgroups_avail)
|
| ++ if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
|
| + return; /* Need to suspend */
|
| +- main->context_state = CTX_PREPARE_FOR_IMCU;
|
| ++ main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
|
| + if (*out_row_ctr >= out_rows_avail)
|
| + return; /* Postprocessor exactly filled output buf */
|
| + /*FALLTHROUGH*/
|
| + case CTX_PREPARE_FOR_IMCU:
|
| + /* Prepare to process first M-1 row groups of this iMCU row */
|
| +- main->rowgroup_ctr = 0;
|
| +- main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1);
|
| ++ main_ptr->rowgroup_ctr = 0;
|
| ++ main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size - 1);
|
| + /* Check for bottom of image: if so, tweak pointers to "duplicate"
|
| + * the last sample row, and adjust rowgroups_avail to ignore padding rows.
|
| + */
|
| +- if (main->iMCU_row_ctr == cinfo->total_iMCU_rows)
|
| ++ if (main_ptr->iMCU_row_ctr == cinfo->total_iMCU_rows)
|
| + set_bottom_pointers(cinfo);
|
| +- main->context_state = CTX_PROCESS_IMCU;
|
| ++ main_ptr->context_state = CTX_PROCESS_IMCU;
|
| + /*FALLTHROUGH*/
|
| + case CTX_PROCESS_IMCU:
|
| + /* Call postprocessor using previously set pointers */
|
| +- (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
|
| +- &main->rowgroup_ctr, main->rowgroups_avail,
|
| ++ (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr],
|
| ++ &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
|
| + output_buf, out_row_ctr, out_rows_avail);
|
| +- if (main->rowgroup_ctr < main->rowgroups_avail)
|
| ++ if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
|
| + return; /* Need to suspend */
|
| + /* After the first iMCU, change wraparound pointers to normal state */
|
| +- if (main->iMCU_row_ctr == 1)
|
| ++ if (main_ptr->iMCU_row_ctr == 1)
|
| + set_wraparound_pointers(cinfo);
|
| + /* Prepare to load new iMCU row using other xbuffer list */
|
| +- main->whichptr ^= 1; /* 0=>1 or 1=>0 */
|
| +- main->buffer_full = FALSE;
|
| ++ main_ptr->whichptr ^= 1; /* 0=>1 or 1=>0 */
|
| ++ main_ptr->buffer_full = FALSE;
|
| + /* Still need to process last row group of this iMCU row, */
|
| + /* which is saved at index M+1 of the other xbuffer */
|
| +- main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1);
|
| +- main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2);
|
| +- main->context_state = CTX_POSTPONED_ROW;
|
| ++ main_ptr->rowgroup_ctr = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 1);
|
| ++ main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 2);
|
| ++ main_ptr->context_state = CTX_POSTPONED_ROW;
|
| + }
|
| + }
|
| +
|
| +@@ -475,15 +417,15 @@
|
| + GLOBAL(void)
|
| + jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
|
| + {
|
| +- my_main_ptr main;
|
| ++ my_main_ptr main_ptr;
|
| + int ci, rgroup, ngroups;
|
| + jpeg_component_info *compptr;
|
| +
|
| +- main = (my_main_ptr)
|
| ++ main_ptr = (my_main_ptr)
|
| + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
|
| + SIZEOF(my_main_controller));
|
| +- cinfo->main = (struct jpeg_d_main_controller *) main;
|
| +- main->pub.start_pass = start_pass_main;
|
| ++ cinfo->main = (struct jpeg_d_main_controller *) main_ptr;
|
| ++ main_ptr->pub.start_pass = start_pass_main;
|
| +
|
| + if (need_full_buffer) /* shouldn't happen */
|
| + ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
| +@@ -492,21 +434,21 @@
|
| + * ngroups is the number of row groups we need.
|
| + */
|
| + if (cinfo->upsample->need_context_rows) {
|
| +- if (cinfo->min_DCT_scaled_size < 2) /* unsupported, see comments above */
|
| ++ if (cinfo->_min_DCT_scaled_size < 2) /* unsupported, see comments above */
|
| + ERREXIT(cinfo, JERR_NOTIMPL);
|
| + alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
|
| +- ngroups = cinfo->min_DCT_scaled_size + 2;
|
| ++ ngroups = cinfo->_min_DCT_scaled_size + 2;
|
| + } else {
|
| +- ngroups = cinfo->min_DCT_scaled_size;
|
| ++ ngroups = cinfo->_min_DCT_scaled_size;
|
| + }
|
| +
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| +- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
|
| +- cinfo->min_DCT_scaled_size; /* height of a row group of component */
|
| +- main->buffer[ci] = (*cinfo->mem->alloc_sarray)
|
| ++ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
|
| ++ cinfo->_min_DCT_scaled_size; /* height of a row group of component */
|
| ++ main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
|
| + ((j_common_ptr) cinfo, JPOOL_IMAGE,
|
| +- compptr->width_in_blocks * compptr->DCT_scaled_size,
|
| ++ compptr->width_in_blocks * compptr->_DCT_scaled_size,
|
| + (JDIMENSION) (rgroup * ngroups));
|
| + }
|
| + }
|
| +Index: jdmarker.c
|
| +===================================================================
|
| +--- jdmarker.c (revision 829)
|
| ++++ jdmarker.c (working copy)
|
| +@@ -1,8 +1,10 @@
|
| + /*
|
| + * jdmarker.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1998, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2012, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains routines to decode JPEG datastream markers.
|
| +@@ -302,7 +304,7 @@
|
| + /* Process a SOS marker */
|
| + {
|
| + INT32 length;
|
| +- int i, ci, n, c, cc;
|
| ++ int i, ci, n, c, cc, pi;
|
| + jpeg_component_info * compptr;
|
| + INPUT_VARS(cinfo);
|
| +
|
| +@@ -322,13 +324,17 @@
|
| +
|
| + /* Collect the component-spec parameters */
|
| +
|
| ++ for (i = 0; i < MAX_COMPS_IN_SCAN; i++)
|
| ++ cinfo->cur_comp_info[i] = NULL;
|
| ++
|
| + for (i = 0; i < n; i++) {
|
| + INPUT_BYTE(cinfo, cc, return FALSE);
|
| + INPUT_BYTE(cinfo, c, return FALSE);
|
| +
|
| +- for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| ++ for (ci = 0, compptr = cinfo->comp_info;
|
| ++ ci < cinfo->num_components && ci < MAX_COMPS_IN_SCAN;
|
| + ci++, compptr++) {
|
| +- if (cc == compptr->component_id)
|
| ++ if (cc == compptr->component_id && !cinfo->cur_comp_info[ci])
|
| + goto id_found;
|
| + }
|
| +
|
| +@@ -342,6 +348,13 @@
|
| +
|
| + TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc,
|
| + compptr->dc_tbl_no, compptr->ac_tbl_no);
|
| ++
|
| ++ /* This CSi (cc) should differ from the previous CSi */
|
| ++ for (pi = 0; pi < i; pi++) {
|
| ++ if (cinfo->cur_comp_info[pi] == compptr) {
|
| ++ ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
|
| ++ }
|
| ++ }
|
| + }
|
| +
|
| + /* Collect the additional scan parameters Ss, Se, Ah/Al. */
|
| +@@ -459,18 +472,21 @@
|
| + for (i = 0; i < count; i++)
|
| + INPUT_BYTE(cinfo, huffval[i], return FALSE);
|
| +
|
| ++ MEMZERO(&huffval[count], (256 - count) * SIZEOF(UINT8));
|
| ++
|
| + length -= count;
|
| +
|
| + if (index & 0x10) { /* AC table definition */
|
| + index -= 0x10;
|
| ++ if (index < 0 || index >= NUM_HUFF_TBLS)
|
| ++ ERREXIT1(cinfo, JERR_DHT_INDEX, index);
|
| + htblptr = &cinfo->ac_huff_tbl_ptrs[index];
|
| + } else { /* DC table definition */
|
| ++ if (index < 0 || index >= NUM_HUFF_TBLS)
|
| ++ ERREXIT1(cinfo, JERR_DHT_INDEX, index);
|
| + htblptr = &cinfo->dc_huff_tbl_ptrs[index];
|
| + }
|
| +
|
| +- if (index < 0 || index >= NUM_HUFF_TBLS)
|
| +- ERREXIT1(cinfo, JERR_DHT_INDEX, index);
|
| +-
|
| + if (*htblptr == NULL)
|
| + *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
|
| +
|
| +@@ -906,7 +922,7 @@
|
| + }
|
| +
|
| + if (cinfo->marker->discarded_bytes != 0) {
|
| +- WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
|
| ++ TRACEMS2(cinfo, 1, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
|
| + cinfo->marker->discarded_bytes = 0;
|
| + }
|
| +
|
| +@@ -940,7 +956,144 @@
|
| + return TRUE;
|
| + }
|
| +
|
| ++#ifdef MOTION_JPEG_SUPPORTED
|
| +
|
| ++/* The default Huffman tables used by motion JPEG frames. When a motion JPEG
|
| ++ * frame does not have DHT tables, we should use the huffman tables suggested by
|
| ++ * the JPEG standard. Each of these tables represents a member of the JHUFF_TBLS
|
| ++ * struct so we can just copy it to the according JHUFF_TBLS member.
|
| ++ */
|
| ++/* DC table 0 */
|
| ++LOCAL(const unsigned char) mjpg_dc0_bits[] = {
|
| ++ 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01,
|
| ++ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
| ++};
|
| ++
|
| ++LOCAL(const unsigned char) mjpg_dc0_huffval[] = {
|
| ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
| ++ 0x08, 0x09, 0x0A, 0x0B
|
| ++};
|
| ++
|
| ++/* DC table 1 */
|
| ++LOCAL(const unsigned char) mjpg_dc1_bits[] = {
|
| ++ 0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
| ++ 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00
|
| ++};
|
| ++
|
| ++LOCAL(const unsigned char) mjpg_dc1_huffval[] = {
|
| ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
| ++ 0x08, 0x09, 0x0A, 0x0B
|
| ++};
|
| ++
|
| ++/* AC table 0 */
|
| ++LOCAL(const unsigned char) mjpg_ac0_bits[] = {
|
| ++ 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03,
|
| ++ 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7D
|
| ++};
|
| ++
|
| ++LOCAL(const unsigned char) mjpg_ac0_huffval[] = {
|
| ++ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
|
| ++ 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
|
| ++ 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08,
|
| ++ 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0,
|
| ++ 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16,
|
| ++ 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28,
|
| ++ 0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
|
| ++ 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
|
| ++ 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
| ++ 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
|
| ++ 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
|
| ++ 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
|
| ++ 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
|
| ++ 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
|
| ++ 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
|
| ++ 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5,
|
| ++ 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4,
|
| ++ 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2,
|
| ++ 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA,
|
| ++ 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
|
| ++ 0xF9, 0xFA
|
| ++};
|
| ++
|
| ++/* AC table 1 */
|
| ++LOCAL(const unsigned char) mjpg_ac1_bits[] = {
|
| ++ 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04,
|
| ++ 0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77
|
| ++};
|
| ++
|
| ++LOCAL(const unsigned char) mjpg_ac1_huffval[] = {
|
| ++ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
|
| ++ 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
|
| ++ 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
|
| ++ 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0,
|
| ++ 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34,
|
| ++ 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26,
|
| ++ 0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38,
|
| ++ 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
| ++ 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
|
| ++ 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
|
| ++ 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
|
| ++ 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
|
| ++ 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96,
|
| ++ 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5,
|
| ++ 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4,
|
| ++ 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3,
|
| ++ 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2,
|
| ++ 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
|
| ++ 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9,
|
| ++ 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
|
| ++ 0xF9, 0xFA
|
| +};
|
| +
|
| -+LOCAL(const unsigned char) mjpg_dc0_huffval[] = {
|
| -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
| -+ 0x08, 0x09, 0x0A, 0x0B
|
| -+};
|
| ++/* Loads the default Huffman tables used by motion JPEG frames. This function
|
| ++ * just copies the huffman tables suggested in the JPEG standard when we have
|
| ++ * not load them.
|
| ++ */
|
| ++LOCAL(void)
|
| ++mjpg_load_huff_tables (j_decompress_ptr cinfo)
|
| ++{
|
| ++ JHUFF_TBL *htblptr;
|
| ++
|
| ++ if (! cinfo->dc_huff_tbl_ptrs[0]) {
|
| ++ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
|
| ++ MEMZERO(htblptr, SIZEOF(JHUFF_TBL));
|
| ++ MEMCOPY(&htblptr->bits[1], mjpg_dc0_bits, SIZEOF(mjpg_dc0_bits));
|
| ++ MEMCOPY(&htblptr->huffval[0], mjpg_dc0_huffval, SIZEOF(mjpg_dc0_huffval));
|
| ++ cinfo->dc_huff_tbl_ptrs[0] = htblptr;
|
| ++ }
|
| ++
|
| ++ if (! cinfo->dc_huff_tbl_ptrs[1]) {
|
| ++ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
|
| ++ MEMZERO(htblptr, SIZEOF(JHUFF_TBL));
|
| ++ MEMCOPY(&htblptr->bits[1], mjpg_dc1_bits, SIZEOF(mjpg_dc1_bits));
|
| ++ MEMCOPY(&htblptr->huffval[0], mjpg_dc1_huffval, SIZEOF(mjpg_dc1_huffval));
|
| ++ cinfo->dc_huff_tbl_ptrs[1] = htblptr;
|
| ++ }
|
| ++
|
| ++ if (! cinfo->ac_huff_tbl_ptrs[0]) {
|
| ++ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
|
| ++ MEMZERO(htblptr, SIZEOF(JHUFF_TBL));
|
| ++ MEMCOPY(&htblptr->bits[1], mjpg_ac0_bits, SIZEOF(mjpg_ac0_bits));
|
| ++ MEMCOPY(&htblptr->huffval[0], mjpg_ac0_huffval, SIZEOF(mjpg_ac0_huffval));
|
| ++ cinfo->ac_huff_tbl_ptrs[0] = htblptr;
|
| ++ }
|
| ++
|
| ++ if (! cinfo->ac_huff_tbl_ptrs[1]) {
|
| ++ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
|
| ++ MEMZERO(htblptr, SIZEOF(JHUFF_TBL));
|
| ++ MEMCOPY(&htblptr->bits[1], mjpg_ac1_bits, SIZEOF(mjpg_ac1_bits));
|
| ++ MEMCOPY(&htblptr->huffval[0], mjpg_ac1_huffval, SIZEOF(mjpg_ac1_huffval));
|
| ++ cinfo->ac_huff_tbl_ptrs[1] = htblptr;
|
| ++ }
|
| ++}
|
| ++
|
| ++#else
|
| ++
|
| ++#define mjpg_load_huff_tables(cinfo)
|
| ++
|
| ++#endif /* MOTION_JPEG_SUPPORTED */
|
| ++
|
| ++
|
| + /*
|
| + * Read markers until SOS or EOI.
|
| + *
|
| +@@ -1009,6 +1162,7 @@
|
| + break;
|
| +
|
| + case M_SOS:
|
| ++ mjpg_load_huff_tables(cinfo);
|
| + if (! get_sos(cinfo))
|
| + return JPEG_SUSPENDED;
|
| + cinfo->unread_marker = 0; /* processed the marker */
|
| +Index: jdmaster.c
|
| +===================================================================
|
| +--- jdmaster.c (revision 829)
|
| ++++ jdmaster.c (working copy)
|
| +@@ -1,9 +1,11 @@
|
| + /*
|
| + * jdmaster.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| +- * Copyright (C) 2009, D. R. Commander.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modified 2002-2009 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2009-2011, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains master control logic for the JPEG decompressor.
|
| +@@ -15,6 +17,7 @@
|
| + #define JPEG_INTERNALS
|
| + #include "jinclude.h"
|
| + #include "jpeglib.h"
|
| ++#include "jpegcomp.h"
|
| +
|
| +
|
| + /* Private state */
|
| +@@ -56,7 +59,11 @@
|
| + cinfo->out_color_space != JCS_EXT_BGR &&
|
| + cinfo->out_color_space != JCS_EXT_BGRX &&
|
| + cinfo->out_color_space != JCS_EXT_XBGR &&
|
| +- cinfo->out_color_space != JCS_EXT_XRGB) ||
|
| ++ cinfo->out_color_space != JCS_EXT_XRGB &&
|
| ++ cinfo->out_color_space != JCS_EXT_RGBA &&
|
| ++ cinfo->out_color_space != JCS_EXT_BGRA &&
|
| ++ cinfo->out_color_space != JCS_EXT_ABGR &&
|
| ++ cinfo->out_color_space != JCS_EXT_ARGB) ||
|
| + cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space])
|
| + return FALSE;
|
| + /* and it only handles 2h1v or 2h2v sampling ratios */
|
| +@@ -68,9 +75,9 @@
|
| + cinfo->comp_info[2].v_samp_factor != 1)
|
| + return FALSE;
|
| + /* furthermore, it doesn't work if we've scaled the IDCTs differently */
|
| +- if (cinfo->comp_info[0].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
|
| +- cinfo->comp_info[1].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
|
| +- cinfo->comp_info[2].DCT_scaled_size != cinfo->min_DCT_scaled_size)
|
| ++ if (cinfo->comp_info[0]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
|
| ++ cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
|
| ++ cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
|
| + return FALSE;
|
| + /* ??? also need to test for upsample-time rescaling, when & if supported */
|
| + return TRUE; /* by golly, it'll work... */
|
| +@@ -84,6 +91,177 @@
|
| + * Compute output image dimensions and related values.
|
| + * NOTE: this is exported for possible use by application.
|
| + * Hence it mustn't do anything that can't be done twice.
|
| ++ */
|
| ++
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++GLOBAL(void)
|
| ++#else
|
| ++LOCAL(void)
|
| ++#endif
|
| ++jpeg_core_output_dimensions (j_decompress_ptr cinfo)
|
| ++/* Do computations that are needed before master selection phase.
|
| ++ * This function is used for transcoding and full decompression.
|
| ++ */
|
| ++{
|
| ++#ifdef IDCT_SCALING_SUPPORTED
|
| ++ int ci;
|
| ++ jpeg_component_info *compptr;
|
| ++
|
| ++ /* Compute actual output image dimensions and DCT scaling choices. */
|
| ++ if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) {
|
| ++ /* Provide 1/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 1;
|
| ++ cinfo->_min_DCT_v_scaled_size = 1;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) {
|
| ++ /* Provide 2/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 2L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 2L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 2;
|
| ++ cinfo->_min_DCT_v_scaled_size = 2;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) {
|
| ++ /* Provide 3/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 3L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 3L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 3;
|
| ++ cinfo->_min_DCT_v_scaled_size = 3;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) {
|
| ++ /* Provide 4/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 4L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 4L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 4;
|
| ++ cinfo->_min_DCT_v_scaled_size = 4;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) {
|
| ++ /* Provide 5/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 5L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 5L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 5;
|
| ++ cinfo->_min_DCT_v_scaled_size = 5;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) {
|
| ++ /* Provide 6/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 6L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 6L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 6;
|
| ++ cinfo->_min_DCT_v_scaled_size = 6;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) {
|
| ++ /* Provide 7/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 7L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 7L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 7;
|
| ++ cinfo->_min_DCT_v_scaled_size = 7;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) {
|
| ++ /* Provide 8/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 8L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 8L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 8;
|
| ++ cinfo->_min_DCT_v_scaled_size = 8;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) {
|
| ++ /* Provide 9/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 9L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 9L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 9;
|
| ++ cinfo->_min_DCT_v_scaled_size = 9;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) {
|
| ++ /* Provide 10/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 10L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 10L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 10;
|
| ++ cinfo->_min_DCT_v_scaled_size = 10;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) {
|
| ++ /* Provide 11/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 11L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 11L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 11;
|
| ++ cinfo->_min_DCT_v_scaled_size = 11;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) {
|
| ++ /* Provide 12/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 12L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 12L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 12;
|
| ++ cinfo->_min_DCT_v_scaled_size = 12;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) {
|
| ++ /* Provide 13/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 13L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 13L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 13;
|
| ++ cinfo->_min_DCT_v_scaled_size = 13;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) {
|
| ++ /* Provide 14/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 14L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 14L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 14;
|
| ++ cinfo->_min_DCT_v_scaled_size = 14;
|
| ++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) {
|
| ++ /* Provide 15/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 15L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 15L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 15;
|
| ++ cinfo->_min_DCT_v_scaled_size = 15;
|
| ++ } else {
|
| ++ /* Provide 16/block_size scaling */
|
| ++ cinfo->output_width = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_width * 16L, (long) DCTSIZE);
|
| ++ cinfo->output_height = (JDIMENSION)
|
| ++ jdiv_round_up((long) cinfo->image_height * 16L, (long) DCTSIZE);
|
| ++ cinfo->_min_DCT_h_scaled_size = 16;
|
| ++ cinfo->_min_DCT_v_scaled_size = 16;
|
| ++ }
|
| ++
|
| ++ /* Recompute dimensions of components */
|
| ++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| ++ ci++, compptr++) {
|
| ++ compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size;
|
| ++ compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size;
|
| ++ }
|
| ++
|
| ++#else /* !IDCT_SCALING_SUPPORTED */
|
| ++
|
| ++ /* Hardwire it to "no scaling" */
|
| ++ cinfo->output_width = cinfo->image_width;
|
| ++ cinfo->output_height = cinfo->image_height;
|
| ++ /* jdinput.c has already initialized DCT_scaled_size,
|
| ++ * and has computed unscaled downsampled_width and downsampled_height.
|
| ++ */
|
| ++
|
| ++#endif /* IDCT_SCALING_SUPPORTED */
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Compute output image dimensions and related values.
|
| ++ * NOTE: this is exported for possible use by application.
|
| ++ * Hence it mustn't do anything that can't be done twice.
|
| + * Also note that it may be called before the master module is initialized!
|
| + */
|
| +
|
| +@@ -100,52 +278,31 @@
|
| + if (cinfo->global_state != DSTATE_READY)
|
| + ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
| +
|
| ++ /* Compute core output image dimensions and DCT scaling choices. */
|
| ++ jpeg_core_output_dimensions(cinfo);
|
| ++
|
| + #ifdef IDCT_SCALING_SUPPORTED
|
| +
|
| +- /* Compute actual output image dimensions and DCT scaling choices. */
|
| +- if (cinfo->scale_num * 8 <= cinfo->scale_denom) {
|
| +- /* Provide 1/8 scaling */
|
| +- cinfo->output_width = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_width, 8L);
|
| +- cinfo->output_height = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_height, 8L);
|
| +- cinfo->min_DCT_scaled_size = 1;
|
| +- } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) {
|
| +- /* Provide 1/4 scaling */
|
| +- cinfo->output_width = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_width, 4L);
|
| +- cinfo->output_height = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_height, 4L);
|
| +- cinfo->min_DCT_scaled_size = 2;
|
| +- } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) {
|
| +- /* Provide 1/2 scaling */
|
| +- cinfo->output_width = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_width, 2L);
|
| +- cinfo->output_height = (JDIMENSION)
|
| +- jdiv_round_up((long) cinfo->image_height, 2L);
|
| +- cinfo->min_DCT_scaled_size = 4;
|
| +- } else {
|
| +- /* Provide 1/1 scaling */
|
| +- cinfo->output_width = cinfo->image_width;
|
| +- cinfo->output_height = cinfo->image_height;
|
| +- cinfo->min_DCT_scaled_size = DCTSIZE;
|
| +- }
|
| + /* In selecting the actual DCT scaling for each component, we try to
|
| + * scale up the chroma components via IDCT scaling rather than upsampling.
|
| + * This saves time if the upsampler gets to use 1:1 scaling.
|
| +- * Note this code assumes that the supported DCT scalings are powers of 2.
|
| ++ * Note this code adapts subsampling ratios which are powers of 2.
|
| + */
|
| + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
| + ci++, compptr++) {
|
| +- int ssize = cinfo->min_DCT_scaled_size;
|
| ++ int ssize = cinfo->_min_DCT_scaled_size;
|
| + while (ssize < DCTSIZE &&
|
| +- (compptr->h_samp_factor * ssize * 2 <=
|
| +- cinfo->max_h_samp_factor * cinfo->min_DCT_scaled_size) &&
|
| +- (compptr->v_samp_factor * ssize * 2 <=
|
| +- cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size)) {
|
| ++ ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) %
|
| ++ (compptr->h_samp_factor * ssize * 2) == 0) &&
|
| ++ ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) %
|
| ++ (compptr->v_samp_factor * ssize * 2) == 0)) {
|
| + ssize = ssize * 2;
|
| + }
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = ssize;
|
| ++#else
|
| + compptr->DCT_scaled_size = ssize;
|
| ++#endif
|
| + }
|
| +
|
| + /* Recompute downsampled dimensions of components;
|
| +@@ -156,11 +313,11 @@
|
| + /* Size in samples, after IDCT scaling */
|
| + compptr->downsampled_width = (JDIMENSION)
|
| + jdiv_round_up((long) cinfo->image_width *
|
| +- (long) (compptr->h_samp_factor * compptr->DCT_scaled_size),
|
| ++ (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size),
|
| + (long) (cinfo->max_h_samp_factor * DCTSIZE));
|
| + compptr->downsampled_height = (JDIMENSION)
|
| + jdiv_round_up((long) cinfo->image_height *
|
| +- (long) (compptr->v_samp_factor * compptr->DCT_scaled_size),
|
| ++ (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size),
|
| + (long) (cinfo->max_v_samp_factor * DCTSIZE));
|
| + }
|
| +
|
| +@@ -188,6 +345,10 @@
|
| + case JCS_EXT_BGRX:
|
| + case JCS_EXT_XBGR:
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_RGBA:
|
| ++ case JCS_EXT_BGRA:
|
| ++ case JCS_EXT_ABGR:
|
| ++ case JCS_EXT_ARGB:
|
| + cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
|
| + break;
|
| + case JCS_YCbCr:
|
| +@@ -384,7 +545,11 @@
|
| + jinit_inverse_dct(cinfo);
|
| + /* Entropy decoding: either Huffman or arithmetic coding. */
|
| + if (cinfo->arith_code) {
|
| ++#ifdef D_ARITH_CODING_SUPPORTED
|
| ++ jinit_arith_decoder(cinfo);
|
| ++#else
|
| + ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
| ++#endif
|
| + } else {
|
| + if (cinfo->progressive_mode) {
|
| + #ifdef D_PROGRESSIVE_SUPPORTED
|
| +Index: jdmerge.c
|
| +===================================================================
|
| +--- jdmerge.c (revision 829)
|
| ++++ jdmerge.c (working copy)
|
| +@@ -1,10 +1,11 @@
|
| + /*
|
| + * jdmerge.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1994-1996, Thomas G. Lane.
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +- * Copyright (C) 2009, D. R. Commander.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2009, 2011, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains code for merged upsampling/color conversion.
|
| +@@ -38,6 +39,7 @@
|
| + #include "jinclude.h"
|
| + #include "jpeglib.h"
|
| + #include "jsimd.h"
|
| ++#include "config.h"
|
| +
|
| + #ifdef UPSAMPLE_MERGING_SUPPORTED
|
| +
|
| +@@ -77,6 +79,107 @@
|
| + #define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
|
| +
|
| +
|
| ++/* Include inline routines for colorspace extensions */
|
| ++
|
| ++#include "jdmrgext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++
|
| ++#define RGB_RED EXT_RGB_RED
|
| ++#define RGB_GREEN EXT_RGB_GREEN
|
| ++#define RGB_BLUE EXT_RGB_BLUE
|
| ++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| ++#define h2v1_merged_upsample_internal extrgb_h2v1_merged_upsample_internal
|
| ++#define h2v2_merged_upsample_internal extrgb_h2v2_merged_upsample_internal
|
| ++#include "jdmrgext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef h2v1_merged_upsample_internal
|
| ++#undef h2v2_merged_upsample_internal
|
| ++
|
| ++#define RGB_RED EXT_RGBX_RED
|
| ++#define RGB_GREEN EXT_RGBX_GREEN
|
| ++#define RGB_BLUE EXT_RGBX_BLUE
|
| ++#define RGB_ALPHA 3
|
| ++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| ++#define h2v1_merged_upsample_internal extrgbx_h2v1_merged_upsample_internal
|
| ++#define h2v2_merged_upsample_internal extrgbx_h2v2_merged_upsample_internal
|
| ++#include "jdmrgext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_ALPHA
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef h2v1_merged_upsample_internal
|
| ++#undef h2v2_merged_upsample_internal
|
| ++
|
| ++#define RGB_RED EXT_BGR_RED
|
| ++#define RGB_GREEN EXT_BGR_GREEN
|
| ++#define RGB_BLUE EXT_BGR_BLUE
|
| ++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| ++#define h2v1_merged_upsample_internal extbgr_h2v1_merged_upsample_internal
|
| ++#define h2v2_merged_upsample_internal extbgr_h2v2_merged_upsample_internal
|
| ++#include "jdmrgext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef h2v1_merged_upsample_internal
|
| ++#undef h2v2_merged_upsample_internal
|
| ++
|
| ++#define RGB_RED EXT_BGRX_RED
|
| ++#define RGB_GREEN EXT_BGRX_GREEN
|
| ++#define RGB_BLUE EXT_BGRX_BLUE
|
| ++#define RGB_ALPHA 3
|
| ++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| ++#define h2v1_merged_upsample_internal extbgrx_h2v1_merged_upsample_internal
|
| ++#define h2v2_merged_upsample_internal extbgrx_h2v2_merged_upsample_internal
|
| ++#include "jdmrgext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_ALPHA
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef h2v1_merged_upsample_internal
|
| ++#undef h2v2_merged_upsample_internal
|
| ++
|
| ++#define RGB_RED EXT_XBGR_RED
|
| ++#define RGB_GREEN EXT_XBGR_GREEN
|
| ++#define RGB_BLUE EXT_XBGR_BLUE
|
| ++#define RGB_ALPHA 0
|
| ++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| ++#define h2v1_merged_upsample_internal extxbgr_h2v1_merged_upsample_internal
|
| ++#define h2v2_merged_upsample_internal extxbgr_h2v2_merged_upsample_internal
|
| ++#include "jdmrgext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_ALPHA
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef h2v1_merged_upsample_internal
|
| ++#undef h2v2_merged_upsample_internal
|
| ++
|
| ++#define RGB_RED EXT_XRGB_RED
|
| ++#define RGB_GREEN EXT_XRGB_GREEN
|
| ++#define RGB_BLUE EXT_XRGB_BLUE
|
| ++#define RGB_ALPHA 0
|
| ++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| ++#define h2v1_merged_upsample_internal extxrgb_h2v1_merged_upsample_internal
|
| ++#define h2v2_merged_upsample_internal extxrgb_h2v2_merged_upsample_internal
|
| ++#include "jdmrgext.c"
|
| ++#undef RGB_RED
|
| ++#undef RGB_GREEN
|
| ++#undef RGB_BLUE
|
| ++#undef RGB_ALPHA
|
| ++#undef RGB_PIXELSIZE
|
| ++#undef h2v1_merged_upsample_internal
|
| ++#undef h2v2_merged_upsample_internal
|
| ++
|
| ++
|
| + /*
|
| + * Initialize tables for YCC->RGB colorspace conversion.
|
| + * This is taken directly from jdcolor.c; see that file for more info.
|
| +@@ -230,56 +333,40 @@
|
| + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
|
| + JSAMPARRAY output_buf)
|
| + {
|
| +- my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
|
| +- register int y, cred, cgreen, cblue;
|
| +- int cb, cr;
|
| +- register JSAMPROW outptr;
|
| +- JSAMPROW inptr0, inptr1, inptr2;
|
| +- JDIMENSION col;
|
| +- /* copy these pointers into registers if possible */
|
| +- register JSAMPLE * range_limit = cinfo->sample_range_limit;
|
| +- int * Crrtab = upsample->Cr_r_tab;
|
| +- int * Cbbtab = upsample->Cb_b_tab;
|
| +- INT32 * Crgtab = upsample->Cr_g_tab;
|
| +- INT32 * Cbgtab = upsample->Cb_g_tab;
|
| +- SHIFT_TEMPS
|
| +-
|
| +- inptr0 = input_buf[0][in_row_group_ctr];
|
| +- inptr1 = input_buf[1][in_row_group_ctr];
|
| +- inptr2 = input_buf[2][in_row_group_ctr];
|
| +- outptr = output_buf[0];
|
| +- /* Loop for each pair of output pixels */
|
| +- for (col = cinfo->output_width >> 1; col > 0; col--) {
|
| +- /* Do the chroma part of the calculation */
|
| +- cb = GETJSAMPLE(*inptr1++);
|
| +- cr = GETJSAMPLE(*inptr2++);
|
| +- cred = Crrtab[cr];
|
| +- cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
| +- cblue = Cbbtab[cb];
|
| +- /* Fetch 2 Y values and emit 2 pixels */
|
| +- y = GETJSAMPLE(*inptr0++);
|
| +- outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
| +- outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
| +- outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
| +- outptr += rgb_pixelsize[cinfo->out_color_space];
|
| +- y = GETJSAMPLE(*inptr0++);
|
| +- outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
| +- outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
| +- outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
| +- outptr += rgb_pixelsize[cinfo->out_color_space];
|
| ++ switch (cinfo->out_color_space) {
|
| ++ case JCS_EXT_RGB:
|
| ++ extrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| ++ extrgbx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_BGR:
|
| ++ extbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| ++ extbgrx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| ++ extxbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| ++ extxrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ default:
|
| ++ h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| + }
|
| +- /* If image width is odd, do the last output column separately */
|
| +- if (cinfo->output_width & 1) {
|
| +- cb = GETJSAMPLE(*inptr1);
|
| +- cr = GETJSAMPLE(*inptr2);
|
| +- cred = Crrtab[cr];
|
| +- cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
| +- cblue = Cbbtab[cb];
|
| +- y = GETJSAMPLE(*inptr0);
|
| +- outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
| +- outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
| +- outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
| +- }
|
| + }
|
| +
|
| +
|
| +@@ -292,72 +379,40 @@
|
| + JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
|
| + JSAMPARRAY output_buf)
|
| + {
|
| +- my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
|
| +- register int y, cred, cgreen, cblue;
|
| +- int cb, cr;
|
| +- register JSAMPROW outptr0, outptr1;
|
| +- JSAMPROW inptr00, inptr01, inptr1, inptr2;
|
| +- JDIMENSION col;
|
| +- /* copy these pointers into registers if possible */
|
| +- register JSAMPLE * range_limit = cinfo->sample_range_limit;
|
| +- int * Crrtab = upsample->Cr_r_tab;
|
| +- int * Cbbtab = upsample->Cb_b_tab;
|
| +- INT32 * Crgtab = upsample->Cr_g_tab;
|
| +- INT32 * Cbgtab = upsample->Cb_g_tab;
|
| +- SHIFT_TEMPS
|
| +-
|
| +- inptr00 = input_buf[0][in_row_group_ctr*2];
|
| +- inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
|
| +- inptr1 = input_buf[1][in_row_group_ctr];
|
| +- inptr2 = input_buf[2][in_row_group_ctr];
|
| +- outptr0 = output_buf[0];
|
| +- outptr1 = output_buf[1];
|
| +- /* Loop for each group of output pixels */
|
| +- for (col = cinfo->output_width >> 1; col > 0; col--) {
|
| +- /* Do the chroma part of the calculation */
|
| +- cb = GETJSAMPLE(*inptr1++);
|
| +- cr = GETJSAMPLE(*inptr2++);
|
| +- cred = Crrtab[cr];
|
| +- cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
| +- cblue = Cbbtab[cb];
|
| +- /* Fetch 4 Y values and emit 4 pixels */
|
| +- y = GETJSAMPLE(*inptr00++);
|
| +- outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
| +- outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
| +- outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
| +- outptr0 += RGB_PIXELSIZE;
|
| +- y = GETJSAMPLE(*inptr00++);
|
| +- outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
| +- outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
| +- outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
| +- outptr0 += RGB_PIXELSIZE;
|
| +- y = GETJSAMPLE(*inptr01++);
|
| +- outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
| +- outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
| +- outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
| +- outptr1 += RGB_PIXELSIZE;
|
| +- y = GETJSAMPLE(*inptr01++);
|
| +- outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
| +- outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
| +- outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
| +- outptr1 += RGB_PIXELSIZE;
|
| ++ switch (cinfo->out_color_space) {
|
| ++ case JCS_EXT_RGB:
|
| ++ extrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| ++ extrgbx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_BGR:
|
| ++ extbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| ++ extbgrx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| ++ extxbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| ++ extxrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| ++ default:
|
| ++ h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
| ++ output_buf);
|
| ++ break;
|
| + }
|
| +- /* If image width is odd, do the last output column separately */
|
| +- if (cinfo->output_width & 1) {
|
| +- cb = GETJSAMPLE(*inptr1);
|
| +- cr = GETJSAMPLE(*inptr2);
|
| +- cred = Crrtab[cr];
|
| +- cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
| +- cblue = Cbbtab[cb];
|
| +- y = GETJSAMPLE(*inptr00);
|
| +- outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
| +- outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
| +- outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
| +- y = GETJSAMPLE(*inptr01);
|
| +- outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
| +- outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
| +- outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
| +- }
|
| + }
|
| +
|
| +
|
| +Index: jdphuff.c
|
| +===================================================================
|
| +--- jdphuff.c (revision 829)
|
| ++++ jdphuff.c (working copy)
|
| +@@ -198,6 +198,7 @@
|
| + * On some machines, a shift and add will be faster than a table lookup.
|
| + */
|
| +
|
| ++#define AVOID_TABLES
|
| + #ifdef AVOID_TABLES
|
| +
|
| + #define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
|
| +Index: jdsample.c
|
| +===================================================================
|
| +--- jdsample.c (revision 829)
|
| ++++ jdsample.c (working copy)
|
| +@@ -1,9 +1,11 @@
|
| + /*
|
| + * jdsample.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1996, Thomas G. Lane.
|
| ++ * libjpeg-turbo Modifications:
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Copyright (C) 2010, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains upsampling routines.
|
| +@@ -19,50 +21,12 @@
|
| + * Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
|
| + */
|
| +
|
| +-#define JPEG_INTERNALS
|
| +-#include "jinclude.h"
|
| +-#include "jpeglib.h"
|
| ++#include "jdsample.h"
|
| + #include "jsimd.h"
|
| ++#include "jpegcomp.h"
|
| +
|
| +
|
| +-/* Pointer to routine to upsample a single component */
|
| +-typedef JMETHOD(void, upsample1_ptr,
|
| +- (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| +- JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
|
| +
|
| +-/* Private subobject */
|
| +-
|
| +-typedef struct {
|
| +- struct jpeg_upsampler pub; /* public fields */
|
| +-
|
| +- /* Color conversion buffer. When using separate upsampling and color
|
| +- * conversion steps, this buffer holds one upsampled row group until it
|
| +- * has been color converted and output.
|
| +- * Note: we do not allocate any storage for component(s) which are full-size,
|
| +- * ie do not need rescaling. The corresponding entry of color_buf[] is
|
| +- * simply set to point to the input data array, thereby avoiding copying.
|
| +- */
|
| +- JSAMPARRAY color_buf[MAX_COMPONENTS];
|
| +-
|
| +- /* Per-component upsampling method pointers */
|
| +- upsample1_ptr methods[MAX_COMPONENTS];
|
| +-
|
| +- int next_row_out; /* counts rows emitted from color_buf */
|
| +- JDIMENSION rows_to_go; /* counts rows remaining in image */
|
| +-
|
| +- /* Height of an input row group for each component. */
|
| +- int rowgroup_height[MAX_COMPONENTS];
|
| +-
|
| +- /* These arrays save pixel expansion factors so that int_expand need not
|
| +- * recompute them each time. They are unused for other upsampling methods.
|
| +- */
|
| +- UINT8 h_expand[MAX_COMPONENTS];
|
| +- UINT8 v_expand[MAX_COMPONENTS];
|
| +-} my_upsampler;
|
| +-
|
| +-typedef my_upsampler * my_upsample_ptr;
|
| +-
|
| +-
|
| + /*
|
| + * Initialize for an upsampling pass.
|
| + */
|
| +@@ -420,7 +384,7 @@
|
| + /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
|
| + * so don't ask for it.
|
| + */
|
| +- do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_scaled_size > 1;
|
| ++ do_fancy = cinfo->do_fancy_upsampling && cinfo->_min_DCT_scaled_size > 1;
|
| +
|
| + /* Verify we can handle the sampling factors, select per-component methods,
|
| + * and create storage as needed.
|
| +@@ -430,10 +394,10 @@
|
| + /* Compute size of an "input group" after IDCT scaling. This many samples
|
| + * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
|
| + */
|
| +- h_in_group = (compptr->h_samp_factor * compptr->DCT_scaled_size) /
|
| +- cinfo->min_DCT_scaled_size;
|
| +- v_in_group = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
|
| +- cinfo->min_DCT_scaled_size;
|
| ++ h_in_group = (compptr->h_samp_factor * compptr->_DCT_scaled_size) /
|
| ++ cinfo->_min_DCT_scaled_size;
|
| ++ v_in_group = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
|
| ++ cinfo->_min_DCT_scaled_size;
|
| + h_out_group = cinfo->max_h_samp_factor;
|
| + v_out_group = cinfo->max_v_samp_factor;
|
| + upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
|
| +Index: jdtrans.c
|
| +===================================================================
|
| +--- jdtrans.c (revision 829)
|
| ++++ jdtrans.c (working copy)
|
| +@@ -99,9 +99,18 @@
|
| + /* This is effectively a buffered-image operation. */
|
| + cinfo->buffered_image = TRUE;
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++ /* Compute output image dimensions and related values. */
|
| ++ jpeg_core_output_dimensions(cinfo);
|
| ++#endif
|
| ++
|
| + /* Entropy decoding: either Huffman or arithmetic coding. */
|
| + if (cinfo->arith_code) {
|
| ++#ifdef D_ARITH_CODING_SUPPORTED
|
| ++ jinit_arith_decoder(cinfo);
|
| ++#else
|
| + ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
| ++#endif
|
| + } else {
|
| + if (cinfo->progressive_mode) {
|
| + #ifdef D_PROGRESSIVE_SUPPORTED
|
| +Index: jerror.h
|
| +===================================================================
|
| +--- jerror.h (revision 829)
|
| ++++ jerror.h (working copy)
|
| +@@ -2,6 +2,7 @@
|
| + * jerror.h
|
| + *
|
| + * Copyright (C) 1994-1997, Thomas G. Lane.
|
| ++ * Modified 1997-2009 by Guido Vollbeding.
|
| + * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| +@@ -39,14 +40,23 @@
|
| + JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
|
| +
|
| + /* For maintenance convenience, list is alphabetical by message code name */
|
| ++#if JPEG_LIB_VERSION < 70
|
| + JMESSAGE(JERR_ARITH_NOTIMPL,
|
| +- "Sorry, there are legal restrictions on arithmetic coding")
|
| ++ "Sorry, arithmetic coding is not implemented")
|
| ++#endif
|
| + JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
|
| + JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
|
| + JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
|
| + JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
|
| ++#endif
|
| + JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
|
| + JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++JMESSAGE(JERR_BAD_DROP_SAMPLING,
|
| ++ "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
|
| ++#endif
|
| + JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
|
| + JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
|
| + JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
|
| +@@ -93,6 +103,9 @@
|
| + JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
|
| + JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
|
| + JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
|
| ++#endif
|
| + JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
|
| + JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
|
| + JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
|
| +@@ -170,6 +183,9 @@
|
| + JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
|
| + JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
|
| + JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
|
| ++#endif
|
| + JMESSAGE(JWRN_BOGUS_PROGRESSION,
|
| + "Inconsistent progression sequence for component %d coefficient %d")
|
| + JMESSAGE(JWRN_EXTRANEOUS_DATA,
|
| +@@ -182,6 +198,13 @@
|
| + "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
|
| + JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
|
| + JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
|
| ++#if JPEG_LIB_VERSION < 70
|
| ++JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
|
| ++#if defined(C_ARITH_CODING_SUPPORTED) || defined(D_ARITH_CODING_SUPPORTED)
|
| ++JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
|
| ++JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
|
| ++#endif
|
| ++#endif
|
| +
|
| + #ifdef JMAKE_ENUM_LIST
|
| +
|
| +Index: jidctint.c
|
| +===================================================================
|
| +--- jidctint.c (revision 829)
|
| ++++ jidctint.c (working copy)
|
| +@@ -2,6 +2,7 @@
|
| + * jidctint.c
|
| + *
|
| + * Copyright (C) 1991-1998, Thomas G. Lane.
|
| ++ * Modification developed 2002-2009 by Guido Vollbeding.
|
| + * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| +@@ -23,6 +24,27 @@
|
| + * The advantage of this method is that no data path contains more than one
|
| + * multiplication; this allows a very simple and accurate implementation in
|
| + * scaled fixed-point arithmetic, with a minimal number of shifts.
|
| ++ *
|
| ++ * We also provide IDCT routines with various output sample block sizes for
|
| ++ * direct resolution reduction or enlargement without additional resampling:
|
| ++ * NxN (N=1...16) pixels for one 8x8 input DCT block.
|
| ++ *
|
| ++ * For N<8 we simply take the corresponding low-frequency coefficients of
|
| ++ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
|
| ++ * to yield the downscaled outputs.
|
| ++ * This can be seen as direct low-pass downsampling from the DCT domain
|
| ++ * point of view rather than the usual spatial domain point of view,
|
| ++ * yielding significant computational savings and results at least
|
| ++ * as good as common bilinear (averaging) spatial downsampling.
|
| ++ *
|
| ++ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
|
| ++ * lower frequencies and higher frequencies assumed to be zero.
|
| ++ * It turns out that the computational effort is similar to the 8x8 IDCT
|
| ++ * regarding the output size.
|
| ++ * Furthermore, the scaling and descaling is the same for all IDCT sizes.
|
| ++ *
|
| ++ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
|
| ++ * since there would be too many additional constants to pre-calculate.
|
| + */
|
| +
|
| + #define JPEG_INTERNALS
|
| +@@ -38,7 +60,7 @@
|
| + */
|
| +
|
| + #if DCTSIZE != 8
|
| +- Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
|
| ++ Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
|
| + #endif
|
| +
|
| +
|
| +@@ -386,4 +408,2216 @@
|
| + }
|
| + }
|
| +
|
| ++#ifdef IDCT_SCALING_SUPPORTED
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a 7x7 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 12 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/14).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
|
| ++ INT32 z1, z2, z3;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[7*7]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ tmp13 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
|
| ++
|
| ++ tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
|
| ++ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
|
| ++ tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
|
| ++ tmp0 = z1 + z3;
|
| ++ z2 -= tmp0;
|
| ++ tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
|
| ++ tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
|
| ++ tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
|
| ++ tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++
|
| ++ tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
|
| ++ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
|
| ++ tmp0 = tmp1 - tmp2;
|
| ++ tmp1 += tmp2;
|
| ++ tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
|
| ++ tmp1 += tmp2;
|
| ++ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
|
| ++ tmp0 += z2;
|
| ++ tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 7 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 7; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ tmp13 <<= CONST_BITS;
|
| ++
|
| ++ z1 = (INT32) wsptr[2];
|
| ++ z2 = (INT32) wsptr[4];
|
| ++ z3 = (INT32) wsptr[6];
|
| ++
|
| ++ tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
|
| ++ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
|
| ++ tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
|
| ++ tmp0 = z1 + z3;
|
| ++ z2 -= tmp0;
|
| ++ tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
|
| ++ tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
|
| ++ tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
|
| ++ tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z3 = (INT32) wsptr[5];
|
| ++
|
| ++ tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
|
| ++ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
|
| ++ tmp0 = tmp1 - tmp2;
|
| ++ tmp1 += tmp2;
|
| ++ tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
|
| ++ tmp1 += tmp2;
|
| ++ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
|
| ++ tmp0 += z2;
|
| ++ tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 7; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a reduced-size 6x6 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 3 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/12).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
|
| ++ INT32 z1, z2, z3;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[6*6]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ tmp0 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++ tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
|
| ++ tmp1 = tmp0 + tmp10;
|
| ++ tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
|
| ++ tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
|
| ++ tmp10 = tmp1 + tmp0;
|
| ++ tmp12 = tmp1 - tmp0;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
|
| ++ tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
|
| ++ tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
|
| ++ tmp1 = (z1 - z2 - z3) << PASS1_BITS;
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[6*1] = (int) (tmp11 + tmp1);
|
| ++ wsptr[6*4] = (int) (tmp11 - tmp1);
|
| ++ wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 6 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 6; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ tmp0 <<= CONST_BITS;
|
| ++ tmp2 = (INT32) wsptr[4];
|
| ++ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
|
| ++ tmp1 = tmp0 + tmp10;
|
| ++ tmp11 = tmp0 - tmp10 - tmp10;
|
| ++ tmp10 = (INT32) wsptr[2];
|
| ++ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
|
| ++ tmp10 = tmp1 + tmp0;
|
| ++ tmp12 = tmp1 - tmp0;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z3 = (INT32) wsptr[5];
|
| ++ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
|
| ++ tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
|
| ++ tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
|
| ++ tmp1 = (z1 - z2 - z3) << CONST_BITS;
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 6; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a reduced-size 5x5 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 5 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/10).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
|
| ++ INT32 z1, z2, z3;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[5*5]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ tmp12 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
|
| ++ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
|
| ++ z3 = tmp12 + z2;
|
| ++ tmp10 = z3 + z1;
|
| ++ tmp11 = z3 - z1;
|
| ++ tmp12 -= z2 << 2;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++
|
| ++ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
|
| ++ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
|
| ++ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 5 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 5; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ tmp12 <<= CONST_BITS;
|
| ++ tmp0 = (INT32) wsptr[2];
|
| ++ tmp1 = (INT32) wsptr[4];
|
| ++ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
|
| ++ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
|
| ++ z3 = tmp12 + z2;
|
| ++ tmp10 = z3 + z1;
|
| ++ tmp11 = z3 - z1;
|
| ++ tmp12 -= z2 << 2;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z2 = (INT32) wsptr[1];
|
| ++ z3 = (INT32) wsptr[3];
|
| ++
|
| ++ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
|
| ++ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
|
| ++ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 5; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a reduced-size 3x3 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 2 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/6).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp0, tmp2, tmp10, tmp12;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[3*3]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ tmp0 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++ tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
|
| ++ tmp10 = tmp0 + tmp12;
|
| ++ tmp2 = tmp0 - tmp12 - tmp12;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 3 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 3; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ tmp0 <<= CONST_BITS;
|
| ++ tmp2 = (INT32) wsptr[2];
|
| ++ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
|
| ++ tmp10 = tmp0 + tmp12;
|
| ++ tmp2 = tmp0 - tmp12 - tmp12;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ tmp12 = (INT32) wsptr[1];
|
| ++ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 3; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a 9x9 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 10 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/18).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
|
| ++ INT32 z1, z2, z3, z4;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[8*9]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ tmp0 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
|
| ++
|
| ++ tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
|
| ++ tmp1 = tmp0 + tmp3;
|
| ++ tmp2 = tmp0 - tmp3 - tmp3;
|
| ++
|
| ++ tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
|
| ++ tmp11 = tmp2 + tmp0;
|
| ++ tmp14 = tmp2 - tmp0 - tmp0;
|
| ++
|
| ++ tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
|
| ++ tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
|
| ++ tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
|
| ++
|
| ++ tmp10 = tmp1 + tmp0 - tmp3;
|
| ++ tmp12 = tmp1 - tmp0 + tmp2;
|
| ++ tmp13 = tmp1 - tmp2 + tmp3;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
|
| ++
|
| ++ z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
|
| ++
|
| ++ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
|
| ++ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
|
| ++ tmp0 = tmp2 + tmp3 - z2;
|
| ++ tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
|
| ++ tmp2 += z2 - tmp1;
|
| ++ tmp3 += z2 + tmp1;
|
| ++ tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 9 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 9; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ tmp0 <<= CONST_BITS;
|
| ++
|
| ++ z1 = (INT32) wsptr[2];
|
| ++ z2 = (INT32) wsptr[4];
|
| ++ z3 = (INT32) wsptr[6];
|
| ++
|
| ++ tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
|
| ++ tmp1 = tmp0 + tmp3;
|
| ++ tmp2 = tmp0 - tmp3 - tmp3;
|
| ++
|
| ++ tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
|
| ++ tmp11 = tmp2 + tmp0;
|
| ++ tmp14 = tmp2 - tmp0 - tmp0;
|
| ++
|
| ++ tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
|
| ++ tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
|
| ++ tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
|
| ++
|
| ++ tmp10 = tmp1 + tmp0 - tmp3;
|
| ++ tmp12 = tmp1 - tmp0 + tmp2;
|
| ++ tmp13 = tmp1 - tmp2 + tmp3;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z3 = (INT32) wsptr[5];
|
| ++ z4 = (INT32) wsptr[7];
|
| ++
|
| ++ z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
|
| ++
|
| ++ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
|
| ++ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
|
| ++ tmp0 = tmp2 + tmp3 - z2;
|
| ++ tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
|
| ++ tmp2 += z2 - tmp1;
|
| ++ tmp3 += z2 + tmp1;
|
| ++ tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 8; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a 10x10 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 12 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/20).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
|
| ++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
|
| ++ INT32 z1, z2, z3, z4, z5;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[8*10]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ z3 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z3 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
|
| ++ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
|
| ++ tmp10 = z3 + z1;
|
| ++ tmp11 = z3 - z2;
|
| ++
|
| ++ tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
|
| ++ CONST_BITS-PASS1_BITS);
|
| ++
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
|
| ++
|
| ++ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
|
| ++ tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
|
| ++ tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
|
| ++
|
| ++ tmp20 = tmp10 + tmp12;
|
| ++ tmp24 = tmp10 - tmp12;
|
| ++ tmp21 = tmp11 + tmp13;
|
| ++ tmp23 = tmp11 - tmp13;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
|
| ++
|
| ++ tmp11 = z2 + z4;
|
| ++ tmp13 = z2 - z4;
|
| ++
|
| ++ tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
|
| ++ z5 = z3 << CONST_BITS;
|
| ++
|
| ++ z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
|
| ++ z4 = z5 + tmp12;
|
| ++
|
| ++ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
|
| ++ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
|
| ++
|
| ++ z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
|
| ++ z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
|
| ++
|
| ++ tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
|
| ++
|
| ++ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
|
| ++ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*2] = (int) (tmp22 + tmp12);
|
| ++ wsptr[8*7] = (int) (tmp22 - tmp12);
|
| ++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 10 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 10; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ z3 <<= CONST_BITS;
|
| ++ z4 = (INT32) wsptr[4];
|
| ++ z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
|
| ++ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
|
| ++ tmp10 = z3 + z1;
|
| ++ tmp11 = z3 - z2;
|
| ++
|
| ++ tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
|
| ++
|
| ++ z2 = (INT32) wsptr[2];
|
| ++ z3 = (INT32) wsptr[6];
|
| ++
|
| ++ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
|
| ++ tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
|
| ++ tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
|
| ++
|
| ++ tmp20 = tmp10 + tmp12;
|
| ++ tmp24 = tmp10 - tmp12;
|
| ++ tmp21 = tmp11 + tmp13;
|
| ++ tmp23 = tmp11 - tmp13;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z3 = (INT32) wsptr[5];
|
| ++ z3 <<= CONST_BITS;
|
| ++ z4 = (INT32) wsptr[7];
|
| ++
|
| ++ tmp11 = z2 + z4;
|
| ++ tmp13 = z2 - z4;
|
| ++
|
| ++ tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
|
| ++
|
| ++ z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
|
| ++ z4 = z3 + tmp12;
|
| ++
|
| ++ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
|
| ++ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
|
| ++
|
| ++ z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
|
| ++ z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
|
| ++
|
| ++ tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
|
| ++
|
| ++ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
|
| ++ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 8; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a 11x11 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 24 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/22).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
|
| ++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
|
| ++ INT32 z1, z2, z3, z4;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[8*11]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ tmp10 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
|
| ++
|
| ++ tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
|
| ++ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
|
| ++ z4 = z1 + z3;
|
| ++ tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
|
| ++ z4 -= z2;
|
| ++ tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
|
| ++ tmp21 = tmp20 + tmp23 + tmp25 -
|
| ++ MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
|
| ++ tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
|
| ++ tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
|
| ++ tmp24 += tmp25;
|
| ++ tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
|
| ++ tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
|
| ++ MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
|
| ++ tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
|
| ++
|
| ++ tmp11 = z1 + z2;
|
| ++ tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
|
| ++ tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
|
| ++ tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
|
| ++ tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
|
| ++ tmp10 = tmp11 + tmp12 + tmp13 -
|
| ++ MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
|
| ++ z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
|
| ++ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
|
| ++ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
|
| ++ z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
|
| ++ tmp11 += z1;
|
| ++ tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
|
| ++ tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
|
| ++ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
|
| ++ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 11 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 11; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ tmp10 <<= CONST_BITS;
|
| ++
|
| ++ z1 = (INT32) wsptr[2];
|
| ++ z2 = (INT32) wsptr[4];
|
| ++ z3 = (INT32) wsptr[6];
|
| ++
|
| ++ tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
|
| ++ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
|
| ++ z4 = z1 + z3;
|
| ++ tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
|
| ++ z4 -= z2;
|
| ++ tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
|
| ++ tmp21 = tmp20 + tmp23 + tmp25 -
|
| ++ MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
|
| ++ tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
|
| ++ tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
|
| ++ tmp24 += tmp25;
|
| ++ tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
|
| ++ tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
|
| ++ MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
|
| ++ tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z3 = (INT32) wsptr[5];
|
| ++ z4 = (INT32) wsptr[7];
|
| ++
|
| ++ tmp11 = z1 + z2;
|
| ++ tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
|
| ++ tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
|
| ++ tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
|
| ++ tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
|
| ++ tmp10 = tmp11 + tmp12 + tmp13 -
|
| ++ MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
|
| ++ z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
|
| ++ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
|
| ++ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
|
| ++ z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
|
| ++ tmp11 += z1;
|
| ++ tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
|
| ++ tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
|
| ++ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
|
| ++ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 8; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a 12x12 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 15 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/24).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
|
| ++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
|
| ++ INT32 z1, z2, z3, z4;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[8*12]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ z3 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z3 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
|
| ++
|
| ++ tmp10 = z3 + z4;
|
| ++ tmp11 = z3 - z4;
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
|
| ++ z1 <<= CONST_BITS;
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
|
| ++ z2 <<= CONST_BITS;
|
| ++
|
| ++ tmp12 = z1 - z2;
|
| ++
|
| ++ tmp21 = z3 + tmp12;
|
| ++ tmp24 = z3 - tmp12;
|
| ++
|
| ++ tmp12 = z4 + z2;
|
| ++
|
| ++ tmp20 = tmp10 + tmp12;
|
| ++ tmp25 = tmp10 - tmp12;
|
| ++
|
| ++ tmp12 = z4 - z1 - z2;
|
| ++
|
| ++ tmp22 = tmp11 + tmp12;
|
| ++ tmp23 = tmp11 - tmp12;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
|
| ++
|
| ++ tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
|
| ++ tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
|
| ++
|
| ++ tmp10 = z1 + z3;
|
| ++ tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
|
| ++ tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
|
| ++ tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
|
| ++ tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
|
| ++ tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
|
| ++ tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
|
| ++ tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
|
| ++ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
|
| ++
|
| ++ z1 -= z4;
|
| ++ z2 -= z3;
|
| ++ z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
|
| ++ tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
|
| ++ tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 12 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 12; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ z3 <<= CONST_BITS;
|
| ++
|
| ++ z4 = (INT32) wsptr[4];
|
| ++ z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
|
| ++
|
| ++ tmp10 = z3 + z4;
|
| ++ tmp11 = z3 - z4;
|
| ++
|
| ++ z1 = (INT32) wsptr[2];
|
| ++ z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
|
| ++ z1 <<= CONST_BITS;
|
| ++ z2 = (INT32) wsptr[6];
|
| ++ z2 <<= CONST_BITS;
|
| ++
|
| ++ tmp12 = z1 - z2;
|
| ++
|
| ++ tmp21 = z3 + tmp12;
|
| ++ tmp24 = z3 - tmp12;
|
| ++
|
| ++ tmp12 = z4 + z2;
|
| ++
|
| ++ tmp20 = tmp10 + tmp12;
|
| ++ tmp25 = tmp10 - tmp12;
|
| ++
|
| ++ tmp12 = z4 - z1 - z2;
|
| ++
|
| ++ tmp22 = tmp11 + tmp12;
|
| ++ tmp23 = tmp11 - tmp12;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z3 = (INT32) wsptr[5];
|
| ++ z4 = (INT32) wsptr[7];
|
| ++
|
| ++ tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
|
| ++ tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
|
| ++
|
| ++ tmp10 = z1 + z3;
|
| ++ tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
|
| ++ tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
|
| ++ tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
|
| ++ tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
|
| ++ tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
|
| ++ tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
|
| ++ tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
|
| ++ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
|
| ++
|
| ++ z1 -= z4;
|
| ++ z2 -= z3;
|
| ++ z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
|
| ++ tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
|
| ++ tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 8; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a 13x13 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 29 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/26).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
|
| ++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
|
| ++ INT32 z1, z2, z3, z4;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[8*13]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ z1 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z1 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
|
| ++
|
| ++ tmp10 = z3 + z4;
|
| ++ tmp11 = z3 - z4;
|
| ++
|
| ++ tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
|
| ++ tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
|
| ++
|
| ++ tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
|
| ++ tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
|
| ++
|
| ++ tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
|
| ++ tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
|
| ++
|
| ++ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
|
| ++ tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
|
| ++
|
| ++ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
|
| ++ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
|
| ++
|
| ++ tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
|
| ++ tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
|
| ++
|
| ++ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
|
| ++
|
| ++ tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
|
| ++ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
|
| ++ tmp15 = z1 + z4;
|
| ++ tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
|
| ++ tmp10 = tmp11 + tmp12 + tmp13 -
|
| ++ MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
|
| ++ tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
|
| ++ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
|
| ++ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
|
| ++ tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
|
| ++ tmp11 += tmp14;
|
| ++ tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
|
| ++ tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
|
| ++ tmp12 += tmp14;
|
| ++ tmp13 += tmp14;
|
| ++ tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
|
| ++ tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
|
| ++ MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
|
| ++ z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
|
| ++ tmp14 += z1;
|
| ++ tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
|
| ++ MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 13 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 13; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ z1 <<= CONST_BITS;
|
| ++
|
| ++ z2 = (INT32) wsptr[2];
|
| ++ z3 = (INT32) wsptr[4];
|
| ++ z4 = (INT32) wsptr[6];
|
| ++
|
| ++ tmp10 = z3 + z4;
|
| ++ tmp11 = z3 - z4;
|
| ++
|
| ++ tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
|
| ++ tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
|
| ++
|
| ++ tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
|
| ++ tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
|
| ++
|
| ++ tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
|
| ++ tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
|
| ++
|
| ++ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
|
| ++ tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
|
| ++
|
| ++ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
|
| ++ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
|
| ++
|
| ++ tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
|
| ++ tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
|
| ++
|
| ++ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z3 = (INT32) wsptr[5];
|
| ++ z4 = (INT32) wsptr[7];
|
| ++
|
| ++ tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
|
| ++ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
|
| ++ tmp15 = z1 + z4;
|
| ++ tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
|
| ++ tmp10 = tmp11 + tmp12 + tmp13 -
|
| ++ MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
|
| ++ tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
|
| ++ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
|
| ++ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
|
| ++ tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
|
| ++ tmp11 += tmp14;
|
| ++ tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
|
| ++ tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
|
| ++ tmp12 += tmp14;
|
| ++ tmp13 += tmp14;
|
| ++ tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
|
| ++ tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
|
| ++ MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
|
| ++ z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
|
| ++ tmp14 += z1;
|
| ++ tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
|
| ++ MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 8; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a 14x14 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 20 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/28).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
|
| ++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
|
| ++ INT32 z1, z2, z3, z4;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[8*14]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ z1 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z1 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
|
| ++ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
|
| ++ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
|
| ++
|
| ++ tmp10 = z1 + z2;
|
| ++ tmp11 = z1 + z3;
|
| ++ tmp12 = z1 - z4;
|
| ++
|
| ++ tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
|
| ++ CONST_BITS-PASS1_BITS);
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
|
| ++
|
| ++ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
|
| ++
|
| ++ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
|
| ++ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
|
| ++ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
|
| ++ MULTIPLY(z2, FIX(1.378756276)); /* c2 */
|
| ++
|
| ++ tmp20 = tmp10 + tmp13;
|
| ++ tmp26 = tmp10 - tmp13;
|
| ++ tmp21 = tmp11 + tmp14;
|
| ++ tmp25 = tmp11 - tmp14;
|
| ++ tmp22 = tmp12 + tmp15;
|
| ++ tmp24 = tmp12 - tmp15;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
|
| ++ tmp13 = z4 << CONST_BITS;
|
| ++
|
| ++ tmp14 = z1 + z3;
|
| ++ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
|
| ++ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
|
| ++ tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
|
| ++ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
|
| ++ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
|
| ++ z1 -= z2;
|
| ++ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
|
| ++ tmp16 += tmp15;
|
| ++ z1 += z4;
|
| ++ z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
|
| ++ tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
|
| ++ tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
|
| ++ z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
|
| ++ tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
|
| ++ tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
|
| ++
|
| ++ tmp13 = (z1 - z3) << PASS1_BITS;
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*3] = (int) (tmp23 + tmp13);
|
| ++ wsptr[8*10] = (int) (tmp23 - tmp13);
|
| ++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 14 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 14; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ z1 <<= CONST_BITS;
|
| ++ z4 = (INT32) wsptr[4];
|
| ++ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
|
| ++ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
|
| ++ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
|
| ++
|
| ++ tmp10 = z1 + z2;
|
| ++ tmp11 = z1 + z3;
|
| ++ tmp12 = z1 - z4;
|
| ++
|
| ++ tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
|
| ++
|
| ++ z1 = (INT32) wsptr[2];
|
| ++ z2 = (INT32) wsptr[6];
|
| ++
|
| ++ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
|
| ++
|
| ++ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
|
| ++ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
|
| ++ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
|
| ++ MULTIPLY(z2, FIX(1.378756276)); /* c2 */
|
| ++
|
| ++ tmp20 = tmp10 + tmp13;
|
| ++ tmp26 = tmp10 - tmp13;
|
| ++ tmp21 = tmp11 + tmp14;
|
| ++ tmp25 = tmp11 - tmp14;
|
| ++ tmp22 = tmp12 + tmp15;
|
| ++ tmp24 = tmp12 - tmp15;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z3 = (INT32) wsptr[5];
|
| ++ z4 = (INT32) wsptr[7];
|
| ++ z4 <<= CONST_BITS;
|
| ++
|
| ++ tmp14 = z1 + z3;
|
| ++ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
|
| ++ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
|
| ++ tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
|
| ++ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
|
| ++ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
|
| ++ z1 -= z2;
|
| ++ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
|
| ++ tmp16 += tmp15;
|
| ++ tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
|
| ++ tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
|
| ++ tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
|
| ++ tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
|
| ++ tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
|
| ++ tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
|
| ++
|
| ++ tmp13 = ((z1 - z3) << CONST_BITS) + z4;
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 8; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a 15x15 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 22 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/30).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
|
| ++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
|
| ++ INT32 z1, z2, z3, z4;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[8*15]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ z1 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z1 += ONE << (CONST_BITS-PASS1_BITS-1);
|
| ++
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
|
| ++
|
| ++ tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
|
| ++ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
|
| ++
|
| ++ tmp12 = z1 - tmp10;
|
| ++ tmp13 = z1 + tmp11;
|
| ++ z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
|
| ++
|
| ++ z4 = z2 - z3;
|
| ++ z3 += z2;
|
| ++ tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
|
| ++ tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
|
| ++ z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
|
| ++
|
| ++ tmp20 = tmp13 + tmp10 + tmp11;
|
| ++ tmp23 = tmp12 - tmp10 + tmp11 + z2;
|
| ++
|
| ++ tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
|
| ++ tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
|
| ++
|
| ++ tmp25 = tmp13 - tmp10 - tmp11;
|
| ++ tmp26 = tmp12 + tmp10 - tmp11 - z2;
|
| ++
|
| ++ tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
|
| ++ tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
|
| ++
|
| ++ tmp21 = tmp12 + tmp10 + tmp11;
|
| ++ tmp24 = tmp13 - tmp10 + tmp11;
|
| ++ tmp11 += tmp11;
|
| ++ tmp22 = z1 + tmp11; /* c10 = c6-c12 */
|
| ++ tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++ z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
|
| ++
|
| ++ tmp13 = z2 - z4;
|
| ++ tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
|
| ++ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
|
| ++ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
|
| ++
|
| ++ tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
|
| ++ tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
|
| ++ z2 = z1 - z4;
|
| ++ tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
|
| ++
|
| ++ tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
|
| ++ tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
|
| ++ tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
|
| ++ z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
|
| ++ tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
|
| ++ tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 15 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 15; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ z1 <<= CONST_BITS;
|
| ++
|
| ++ z2 = (INT32) wsptr[2];
|
| ++ z3 = (INT32) wsptr[4];
|
| ++ z4 = (INT32) wsptr[6];
|
| ++
|
| ++ tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
|
| ++ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
|
| ++
|
| ++ tmp12 = z1 - tmp10;
|
| ++ tmp13 = z1 + tmp11;
|
| ++ z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
|
| ++
|
| ++ z4 = z2 - z3;
|
| ++ z3 += z2;
|
| ++ tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
|
| ++ tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
|
| ++ z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
|
| ++
|
| ++ tmp20 = tmp13 + tmp10 + tmp11;
|
| ++ tmp23 = tmp12 - tmp10 + tmp11 + z2;
|
| ++
|
| ++ tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
|
| ++ tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
|
| ++
|
| ++ tmp25 = tmp13 - tmp10 - tmp11;
|
| ++ tmp26 = tmp12 + tmp10 - tmp11 - z2;
|
| ++
|
| ++ tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
|
| ++ tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
|
| ++
|
| ++ tmp21 = tmp12 + tmp10 + tmp11;
|
| ++ tmp24 = tmp13 - tmp10 + tmp11;
|
| ++ tmp11 += tmp11;
|
| ++ tmp22 = z1 + tmp11; /* c10 = c6-c12 */
|
| ++ tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z4 = (INT32) wsptr[5];
|
| ++ z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
|
| ++ z4 = (INT32) wsptr[7];
|
| ++
|
| ++ tmp13 = z2 - z4;
|
| ++ tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
|
| ++ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
|
| ++ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
|
| ++
|
| ++ tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
|
| ++ tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
|
| ++ z2 = z1 - z4;
|
| ++ tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
|
| ++
|
| ++ tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
|
| ++ tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
|
| ++ tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
|
| ++ z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
|
| ++ tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
|
| ++ tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 8; /* advance pointer to next row */
|
| ++ }
|
| ++}
|
| ++
|
| ++
|
| ++/*
|
| ++ * Perform dequantization and inverse DCT on one block of coefficients,
|
| ++ * producing a 16x16 output block.
|
| ++ *
|
| ++ * Optimized algorithm with 28 multiplications in the 1-D kernel.
|
| ++ * cK represents sqrt(2) * cos(K*pi/32).
|
| ++ */
|
| ++
|
| ++GLOBAL(void)
|
| ++jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++{
|
| ++ INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
|
| ++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
|
| ++ INT32 z1, z2, z3, z4;
|
| ++ JCOEFPTR inptr;
|
| ++ ISLOW_MULT_TYPE * quantptr;
|
| ++ int * wsptr;
|
| ++ JSAMPROW outptr;
|
| ++ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
| ++ int ctr;
|
| ++ int workspace[8*16]; /* buffers data between passes */
|
| ++ SHIFT_TEMPS
|
| ++
|
| ++ /* Pass 1: process columns from input, store into work array. */
|
| ++
|
| ++ inptr = coef_block;
|
| ++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
|
| ++ /* Even part */
|
| ++
|
| ++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
|
| ++ tmp0 <<= CONST_BITS;
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
|
| ++ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
|
| ++ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
|
| ++
|
| ++ tmp10 = tmp0 + tmp1;
|
| ++ tmp11 = tmp0 - tmp1;
|
| ++ tmp12 = tmp0 + tmp2;
|
| ++ tmp13 = tmp0 - tmp2;
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
|
| ++ z3 = z1 - z2;
|
| ++ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
|
| ++ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
|
| ++
|
| ++ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
|
| ++ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
|
| ++ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
|
| ++ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
|
| ++
|
| ++ tmp20 = tmp10 + tmp0;
|
| ++ tmp27 = tmp10 - tmp0;
|
| ++ tmp21 = tmp12 + tmp1;
|
| ++ tmp26 = tmp12 - tmp1;
|
| ++ tmp22 = tmp13 + tmp2;
|
| ++ tmp25 = tmp13 - tmp2;
|
| ++ tmp23 = tmp11 + tmp3;
|
| ++ tmp24 = tmp11 - tmp3;
|
| ++
|
| ++ /* Odd part */
|
| ++
|
| ++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
|
| ++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
|
| ++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
|
| ++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
|
| ++
|
| ++ tmp11 = z1 + z3;
|
| ++
|
| ++ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
|
| ++ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
|
| ++ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
|
| ++ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
|
| ++ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
|
| ++ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
|
| ++ tmp0 = tmp1 + tmp2 + tmp3 -
|
| ++ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
|
| ++ tmp13 = tmp10 + tmp11 + tmp12 -
|
| ++ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
|
| ++ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
|
| ++ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
|
| ++ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
|
| ++ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
|
| ++ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
|
| ++ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
|
| ++ z2 += z4;
|
| ++ z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
|
| ++ tmp1 += z1;
|
| ++ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
|
| ++ z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
|
| ++ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
|
| ++ tmp12 += z2;
|
| ++ z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
|
| ++ tmp2 += z2;
|
| ++ tmp3 += z2;
|
| ++ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
|
| ++ tmp10 += z2;
|
| ++ tmp11 += z2;
|
| ++
|
| ++ /* Final output stage */
|
| ++
|
| ++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
|
| ++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
|
| ++ }
|
| ++
|
| ++ /* Pass 2: process 16 rows from work array, store into output array. */
|
| ++
|
| ++ wsptr = workspace;
|
| ++ for (ctr = 0; ctr < 16; ctr++) {
|
| ++ outptr = output_buf[ctr] + output_col;
|
| ++
|
| ++ /* Even part */
|
| ++
|
| ++ /* Add fudge factor here for final descale. */
|
| ++ tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
|
| ++ tmp0 <<= CONST_BITS;
|
| ++
|
| ++ z1 = (INT32) wsptr[4];
|
| ++ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
|
| ++ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
|
| +
|
| -+/* DC table 1 */
|
| -+LOCAL(const unsigned char) mjpg_dc1_bits[] = {
|
| -+ 0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
| -+ 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00
|
| -+};
|
| ++ tmp10 = tmp0 + tmp1;
|
| ++ tmp11 = tmp0 - tmp1;
|
| ++ tmp12 = tmp0 + tmp2;
|
| ++ tmp13 = tmp0 - tmp2;
|
| +
|
| -+LOCAL(const unsigned char) mjpg_dc1_huffval[] = {
|
| -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
| -+ 0x08, 0x09, 0x0A, 0x0B
|
| -+};
|
| -+
|
| -+/* AC table 0 */
|
| -+LOCAL(const unsigned char) mjpg_ac0_bits[] = {
|
| -+ 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03,
|
| -+ 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7D
|
| -+};
|
| ++ z1 = (INT32) wsptr[2];
|
| ++ z2 = (INT32) wsptr[6];
|
| ++ z3 = z1 - z2;
|
| ++ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
|
| ++ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
|
| +
|
| -+LOCAL(const unsigned char) mjpg_ac0_huffval[] = {
|
| -+ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
|
| -+ 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
|
| -+ 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08,
|
| -+ 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0,
|
| -+ 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16,
|
| -+ 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28,
|
| -+ 0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
|
| -+ 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
|
| -+ 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
| -+ 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
|
| -+ 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
|
| -+ 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
|
| -+ 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
|
| -+ 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
|
| -+ 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
|
| -+ 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5,
|
| -+ 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4,
|
| -+ 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2,
|
| -+ 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA,
|
| -+ 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
|
| -+ 0xF9, 0xFA
|
| -+};
|
| ++ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
|
| ++ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
|
| ++ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
|
| ++ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
|
| +
|
| -+/* AC table 1 */
|
| -+LOCAL(const unsigned char) mjpg_ac1_bits[] = {
|
| -+ 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04,
|
| -+ 0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77
|
| -+};
|
| ++ tmp20 = tmp10 + tmp0;
|
| ++ tmp27 = tmp10 - tmp0;
|
| ++ tmp21 = tmp12 + tmp1;
|
| ++ tmp26 = tmp12 - tmp1;
|
| ++ tmp22 = tmp13 + tmp2;
|
| ++ tmp25 = tmp13 - tmp2;
|
| ++ tmp23 = tmp11 + tmp3;
|
| ++ tmp24 = tmp11 - tmp3;
|
| +
|
| -+LOCAL(const unsigned char) mjpg_ac1_huffval[] = {
|
| -+ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
|
| -+ 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
|
| -+ 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
|
| -+ 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0,
|
| -+ 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34,
|
| -+ 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26,
|
| -+ 0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38,
|
| -+ 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
| -+ 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
|
| -+ 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
|
| -+ 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
|
| -+ 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
|
| -+ 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96,
|
| -+ 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5,
|
| -+ 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4,
|
| -+ 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3,
|
| -+ 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2,
|
| -+ 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
|
| -+ 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9,
|
| -+ 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
|
| -+ 0xF9, 0xFA
|
| -+};
|
| ++ /* Odd part */
|
| +
|
| -+/* Loads the default Huffman tables used by motion JPEG frames. This function
|
| -+ * just copies the huffman tables suggested in the JPEG standard when we have
|
| -+ * not load them.
|
| -+ */
|
| -+LOCAL(void)
|
| -+mjpg_load_huff_tables (j_decompress_ptr cinfo)
|
| -+{
|
| -+ JHUFF_TBL *htblptr;
|
| ++ z1 = (INT32) wsptr[1];
|
| ++ z2 = (INT32) wsptr[3];
|
| ++ z3 = (INT32) wsptr[5];
|
| ++ z4 = (INT32) wsptr[7];
|
| +
|
| -+ if (! cinfo->dc_huff_tbl_ptrs[0]) {
|
| -+ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
|
| -+ MEMZERO(htblptr, SIZEOF(JHUFF_TBL));
|
| -+ MEMCOPY(&htblptr->bits[1], mjpg_dc0_bits, SIZEOF(mjpg_dc0_bits));
|
| -+ MEMCOPY(&htblptr->huffval[0], mjpg_dc0_huffval, SIZEOF(mjpg_dc0_huffval));
|
| -+ cinfo->dc_huff_tbl_ptrs[0] = htblptr;
|
| -+ }
|
| ++ tmp11 = z1 + z3;
|
| +
|
| -+ if (! cinfo->dc_huff_tbl_ptrs[1]) {
|
| -+ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
|
| -+ MEMZERO(htblptr, SIZEOF(JHUFF_TBL));
|
| -+ MEMCOPY(&htblptr->bits[1], mjpg_dc1_bits, SIZEOF(mjpg_dc1_bits));
|
| -+ MEMCOPY(&htblptr->huffval[0], mjpg_dc1_huffval, SIZEOF(mjpg_dc1_huffval));
|
| -+ cinfo->dc_huff_tbl_ptrs[1] = htblptr;
|
| -+ }
|
| ++ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
|
| ++ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
|
| ++ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
|
| ++ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
|
| ++ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
|
| ++ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
|
| ++ tmp0 = tmp1 + tmp2 + tmp3 -
|
| ++ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
|
| ++ tmp13 = tmp10 + tmp11 + tmp12 -
|
| ++ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
|
| ++ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
|
| ++ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
|
| ++ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
|
| ++ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
|
| ++ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
|
| ++ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
|
| ++ z2 += z4;
|
| ++ z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
|
| ++ tmp1 += z1;
|
| ++ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
|
| ++ z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
|
| ++ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
|
| ++ tmp12 += z2;
|
| ++ z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
|
| ++ tmp2 += z2;
|
| ++ tmp3 += z2;
|
| ++ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
|
| ++ tmp10 += z2;
|
| ++ tmp11 += z2;
|
| +
|
| -+ if (! cinfo->ac_huff_tbl_ptrs[0]) {
|
| -+ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
|
| -+ MEMZERO(htblptr, SIZEOF(JHUFF_TBL));
|
| -+ MEMCOPY(&htblptr->bits[1], mjpg_ac0_bits, SIZEOF(mjpg_ac0_bits));
|
| -+ MEMCOPY(&htblptr->huffval[0], mjpg_ac0_huffval, SIZEOF(mjpg_ac0_huffval));
|
| -+ cinfo->ac_huff_tbl_ptrs[0] = htblptr;
|
| -+ }
|
| ++ /* Final output stage */
|
| +
|
| -+ if (! cinfo->ac_huff_tbl_ptrs[1]) {
|
| -+ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
|
| -+ MEMZERO(htblptr, SIZEOF(JHUFF_TBL));
|
| -+ MEMCOPY(&htblptr->bits[1], mjpg_ac1_bits, SIZEOF(mjpg_ac1_bits));
|
| -+ MEMCOPY(&htblptr->huffval[0], mjpg_ac1_huffval, SIZEOF(mjpg_ac1_huffval));
|
| -+ cinfo->ac_huff_tbl_ptrs[1] = htblptr;
|
| ++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
|
| ++ CONST_BITS+PASS1_BITS+3)
|
| ++ & RANGE_MASK];
|
| ++
|
| ++ wsptr += 8; /* advance pointer to next row */
|
| + }
|
| +}
|
| +
|
| -+#else
|
| -+
|
| -+#define mjpg_load_huff_tables(cinfo)
|
| -+
|
| -+#endif /* MOTION_JPEG_SUPPORTED */
|
| ++#endif /* IDCT_SCALING_SUPPORTED */
|
| + #endif /* DCT_ISLOW_SUPPORTED */
|
| +Index: jmemmgr.c
|
| +===================================================================
|
| +--- jmemmgr.c (revision 829)
|
| ++++ jmemmgr.c (working copy)
|
| +@@ -37,6 +37,15 @@
|
| + #endif
|
| +
|
| +
|
| ++LOCAL(size_t)
|
| ++round_up_pow2 (size_t a, size_t b)
|
| ++/* a rounded up to the next multiple of b, i.e. ceil(a/b)*b */
|
| ++/* Assumes a >= 0, b > 0, and b is a power of 2 */
|
| ++{
|
| ++ return ((a + b - 1) & (~(b - 1)));
|
| ++}
|
| +
|
| +
|
| /*
|
| - * Read markers until SOS or EOI.
|
| - *
|
| -@@ -1013,6 +1150,7 @@
|
| - break;
|
| + * Some important notes:
|
| + * The allocation routines provided here must never return NULL.
|
| +@@ -122,7 +131,7 @@
|
| + jvirt_barray_ptr virt_barray_list;
|
| +
|
| + /* This counts total space obtained from jpeg_get_small/large */
|
| +- long total_space_allocated;
|
| ++ size_t total_space_allocated;
|
| +
|
| + /* alloc_sarray and alloc_barray set this value for use by virtual
|
| + * array routines.
|
| +@@ -265,7 +274,7 @@
|
| + * and so that algorithms can straddle outside the proper area up
|
| + * to the next alignment.
|
| + */
|
| +- sizeofobject = jround_up(sizeofobject, ALIGN_SIZE);
|
| ++ sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE);
|
| +
|
| + /* Check for unsatisfiable request (do now to ensure no overflow below) */
|
| + if ((SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
|
| +@@ -317,8 +326,8 @@
|
| + /* OK, allocate the object from the current pool */
|
| + data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */
|
| + data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */
|
| +- if ((unsigned long)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
|
| +- data_ptr += ALIGN_SIZE - (unsigned long)data_ptr % ALIGN_SIZE;
|
| ++ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
|
| ++ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE;
|
| + data_ptr += hdr_ptr->bytes_used; /* point to place for object */
|
| + hdr_ptr->bytes_used += sizeofobject;
|
| + hdr_ptr->bytes_left -= sizeofobject;
|
| +@@ -354,7 +363,7 @@
|
| + * algorithms can straddle outside the proper area up to the next
|
| + * alignment.
|
| + */
|
| +- sizeofobject = jround_up(sizeofobject, ALIGN_SIZE);
|
| ++ sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE);
|
|
|
| - case M_SOS:
|
| -+ mjpg_load_huff_tables(cinfo);
|
| - if (! get_sos(cinfo))
|
| - return JPEG_SUSPENDED;
|
| - cinfo->unread_marker = 0; /* processed the marker */
|
| + /* Check for unsatisfiable request (do now to ensure no overflow below) */
|
| + if ((SIZEOF(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
|
| +@@ -382,8 +391,8 @@
|
| +
|
| + data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */
|
| + data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */
|
| +- if ((unsigned long)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
|
| +- data_ptr += ALIGN_SIZE - (unsigned long)data_ptr % ALIGN_SIZE;
|
| ++ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
|
| ++ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE;
|
| +
|
| + return (void FAR *) data_ptr;
|
| + }
|
| +@@ -420,7 +429,7 @@
|
| + /* Make sure each row is properly aligned */
|
| + if ((ALIGN_SIZE % SIZEOF(JSAMPLE)) != 0)
|
| + out_of_memory(cinfo, 5); /* safety check */
|
| +- samplesperrow = jround_up(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE));
|
| ++ samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE));
|
| +
|
| + /* Calculate max # of rows allowed in one allocation chunk */
|
| + ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
|
| +@@ -608,8 +617,8 @@
|
| + /* Allocate the in-memory buffers for any unrealized virtual arrays */
|
| + {
|
| + my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
|
| +- long space_per_minheight, maximum_space, avail_mem;
|
| +- long minheights, max_minheights;
|
| ++ size_t space_per_minheight, maximum_space, avail_mem;
|
| ++ size_t minheights, max_minheights;
|
| + jvirt_sarray_ptr sptr;
|
| + jvirt_barray_ptr bptr;
|
| +
|
| +Index: jmemnobs.c
|
| +===================================================================
|
| +--- jmemnobs.c (revision 829)
|
| ++++ jmemnobs.c (working copy)
|
| +@@ -69,9 +69,9 @@
|
| + * Here we always say, "we got all you want bud!"
|
| + */
|
| +
|
| +-GLOBAL(long)
|
| +-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
|
| +- long max_bytes_needed, long already_allocated)
|
| ++GLOBAL(size_t)
|
| ++jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed,
|
| ++ size_t max_bytes_needed, size_t already_allocated)
|
| + {
|
| + return max_bytes_needed;
|
| + }
|
| +Index: jmemsys.h
|
| +===================================================================
|
| +--- jmemsys.h (revision 829)
|
| ++++ jmemsys.h (working copy)
|
| +@@ -100,10 +100,10 @@
|
| + * Conversely, zero may be returned to always use the minimum amount of memory.
|
| + */
|
| +
|
| +-EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
|
| +- long min_bytes_needed,
|
| +- long max_bytes_needed,
|
| +- long already_allocated));
|
| ++EXTERN(size_t) jpeg_mem_available JPP((j_common_ptr cinfo,
|
| ++ size_t min_bytes_needed,
|
| ++ size_t max_bytes_needed,
|
| ++ size_t already_allocated));
|
| +
|
| +
|
| + /*
|
| Index: jmorecfg.h
|
| ===================================================================
|
| --- jmorecfg.h (revision 829)
|
| +++ jmorecfg.h (working copy)
|
| -@@ -153,14 +153,18 @@
|
| +@@ -1,9 +1,10 @@
|
| + /*
|
| + * jmorecfg.h
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| +- * Copyright (C) 2009, D. R. Commander.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modifications:
|
| ++ * Copyright (C) 2009, 2011, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains additional configuration options that customize the
|
| +@@ -153,14 +154,18 @@
|
| /* INT16 must hold at least the values -32768..32767. */
|
|
|
| #ifndef XMD_H /* X11/xmd.h correctly defines INT16 */
|
| @@ -184,43 +9623,536 @@ Index: jmorecfg.h
|
| typedef long INT32;
|
| #endif
|
| +#endif
|
| -
|
| - /* Datatype used for image dimensions. The JPEG standard only supports
|
| - * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore
|
| -@@ -210,11 +214,13 @@
|
| - * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
|
| - */
|
| -
|
| -+#ifndef FAR
|
| - #ifdef NEED_FAR_POINTERS
|
| - #define FAR far
|
| - #else
|
| - #define FAR
|
| - #endif
|
| +
|
| + /* Datatype used for image dimensions. The JPEG standard only supports
|
| + * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore
|
| +@@ -210,11 +215,16 @@
|
| + * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
|
| + */
|
| +
|
| ++#ifndef FAR
|
| + #ifdef NEED_FAR_POINTERS
|
| ++#ifndef FAR
|
| + #define FAR far
|
| ++#endif
|
| + #else
|
| ++#undef FAR
|
| + #define FAR
|
| + #endif
|
| ++#endif
|
| +
|
| +
|
| + /*
|
| +@@ -257,8 +267,6 @@
|
| + * (You may HAVE to do that if your compiler doesn't like null source files.)
|
| + */
|
| +
|
| +-/* Arithmetic coding is unsupported for legal reasons. Complaints to IBM. */
|
| +-
|
| + /* Capability options common to encoder and decoder: */
|
| +
|
| + #define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */
|
| +@@ -267,7 +275,6 @@
|
| +
|
| + /* Encoder capability options: */
|
| +
|
| +-#undef C_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */
|
| + #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
|
| + #define C_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/
|
| + #define ENTROPY_OPT_SUPPORTED /* Optimization of entropy coding parms? */
|
| +@@ -283,7 +290,6 @@
|
| +
|
| + /* Decoder capability options: */
|
| +
|
| +-#undef D_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */
|
| + #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
|
| + #define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/
|
| + #define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */
|
| +@@ -317,22 +323,60 @@
|
| + #define RGB_BLUE 2 /* Offset of Blue */
|
| + #define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */
|
| +
|
| +-#define JPEG_NUMCS 12
|
| ++#define JPEG_NUMCS 16
|
| +
|
| ++#define EXT_RGB_RED 0
|
| ++#define EXT_RGB_GREEN 1
|
| ++#define EXT_RGB_BLUE 2
|
| ++#define EXT_RGB_PIXELSIZE 3
|
| ++
|
| ++#define EXT_RGBX_RED 0
|
| ++#define EXT_RGBX_GREEN 1
|
| ++#define EXT_RGBX_BLUE 2
|
| ++#define EXT_RGBX_PIXELSIZE 4
|
| ++
|
| ++#define EXT_BGR_RED 2
|
| ++#define EXT_BGR_GREEN 1
|
| ++#define EXT_BGR_BLUE 0
|
| ++#define EXT_BGR_PIXELSIZE 3
|
| ++
|
| ++#define EXT_BGRX_RED 2
|
| ++#define EXT_BGRX_GREEN 1
|
| ++#define EXT_BGRX_BLUE 0
|
| ++#define EXT_BGRX_PIXELSIZE 4
|
| ++
|
| ++#define EXT_XBGR_RED 3
|
| ++#define EXT_XBGR_GREEN 2
|
| ++#define EXT_XBGR_BLUE 1
|
| ++#define EXT_XBGR_PIXELSIZE 4
|
| ++
|
| ++#define EXT_XRGB_RED 1
|
| ++#define EXT_XRGB_GREEN 2
|
| ++#define EXT_XRGB_BLUE 3
|
| ++#define EXT_XRGB_PIXELSIZE 4
|
| ++
|
| + static const int rgb_red[JPEG_NUMCS] = {
|
| +- -1, -1, RGB_RED, -1, -1, -1, 0, 0, 2, 2, 3, 1
|
| ++ -1, -1, RGB_RED, -1, -1, -1, EXT_RGB_RED, EXT_RGBX_RED,
|
| ++ EXT_BGR_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED,
|
| ++ EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED
|
| + };
|
| +
|
| + static const int rgb_green[JPEG_NUMCS] = {
|
| +- -1, -1, RGB_GREEN, -1, -1, -1, 1, 1, 1, 1, 2, 2
|
| ++ -1, -1, RGB_GREEN, -1, -1, -1, EXT_RGB_GREEN, EXT_RGBX_GREEN,
|
| ++ EXT_BGR_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN,
|
| ++ EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN
|
| + };
|
| +
|
| + static const int rgb_blue[JPEG_NUMCS] = {
|
| +- -1, -1, RGB_BLUE, -1, -1, -1, 2, 2, 0, 0, 1, 3
|
| ++ -1, -1, RGB_BLUE, -1, -1, -1, EXT_RGB_BLUE, EXT_RGBX_BLUE,
|
| ++ EXT_BGR_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE,
|
| ++ EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE
|
| + };
|
| +
|
| + static const int rgb_pixelsize[JPEG_NUMCS] = {
|
| +- -1, -1, RGB_PIXELSIZE, -1, -1, -1, 3, 4, 3, 4, 4, 4
|
| ++ -1, -1, RGB_PIXELSIZE, -1, -1, -1, EXT_RGB_PIXELSIZE, EXT_RGBX_PIXELSIZE,
|
| ++ EXT_BGR_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE,
|
| ++ EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE
|
| + };
|
| +
|
| + /* Definitions for speed-related optimizations. */
|
| +Index: jpegint.h
|
| +===================================================================
|
| +--- jpegint.h (revision 829)
|
| ++++ jpegint.h (working copy)
|
| +@@ -2,6 +2,7 @@
|
| + * jpegint.h
|
| + *
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| ++ * Modified 1997-2009 by Guido Vollbeding.
|
| + * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| +@@ -304,6 +305,7 @@
|
| + #define jinit_forward_dct jIFDCT
|
| + #define jinit_huff_encoder jIHEncoder
|
| + #define jinit_phuff_encoder jIPHEncoder
|
| ++#define jinit_arith_encoder jIAEncoder
|
| + #define jinit_marker_writer jIMWriter
|
| + #define jinit_master_decompress jIDMaster
|
| + #define jinit_d_main_controller jIDMainC
|
| +@@ -313,6 +315,7 @@
|
| + #define jinit_marker_reader jIMReader
|
| + #define jinit_huff_decoder jIHDecoder
|
| + #define jinit_phuff_decoder jIPHDecoder
|
| ++#define jinit_arith_decoder jIADecoder
|
| + #define jinit_inverse_dct jIIDCT
|
| + #define jinit_upsampler jIUpsampler
|
| + #define jinit_color_deconverter jIDColor
|
| +@@ -327,6 +330,7 @@
|
| + #define jzero_far jZeroFar
|
| + #define jpeg_zigzag_order jZIGTable
|
| + #define jpeg_natural_order jZAGTable
|
| ++#define jpeg_aritab jAriTab
|
| + #endif /* NEED_SHORT_EXTERNAL_NAMES */
|
| +
|
| +
|
| +@@ -345,6 +349,7 @@
|
| + EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
|
| + EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
|
| + EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo));
|
| ++EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo));
|
| + EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
|
| + /* Decompression module initialization routines */
|
| + EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
|
| +@@ -358,6 +363,7 @@
|
| + EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
|
| + EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
|
| + EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo));
|
| ++EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo));
|
| + EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
|
| + EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
|
| + EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
|
| +@@ -382,6 +388,9 @@
|
| + #endif
|
| + extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
|
| +
|
| ++/* Arithmetic coding probability estimation tables in jaricom.c */
|
| ++extern const INT32 jpeg_aritab[];
|
| ++
|
| + /* Suppress undefined-structure complaints if necessary. */
|
| +
|
| + #ifdef INCOMPLETE_TYPES_BROKEN
|
| +Index: jpeglib.h
|
| +===================================================================
|
| +--- jpeglib.h (revision 829)
|
| ++++ jpeglib.h (working copy)
|
| +@@ -1,9 +1,12 @@
|
| + /*
|
| + * jpeglib.h
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1998, Thomas G. Lane.
|
| +- * Copyright (C) 2009, D. R. Commander.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modified 2002-2009 by Guido Vollbeding.
|
| ++ * Modifications:
|
| ++ * Copyright (C) 2009-2011, 2013, D. R. Commander.
|
| ++ * Copyright (C) 2015, Google, Inc.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file defines the application interface for the JPEG library.
|
| +@@ -14,6 +17,10 @@
|
| + #ifndef JPEGLIB_H
|
| + #define JPEGLIB_H
|
| +
|
| ++/* Begin chromium edits */
|
| ++#include "jpeglibmangler.h"
|
| ++/* End chromium edits */
|
| ++
|
| + /*
|
| + * First we include the configuration files that record how this
|
| + * installation of the JPEG library is set up. jconfig.h can be
|
| +@@ -27,13 +34,13 @@
|
| + #include "jmorecfg.h" /* seldom changed options */
|
| +
|
| +
|
| +-/* Version ID for the JPEG library.
|
| +- * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
|
| +- */
|
| ++#ifdef __cplusplus
|
| ++#ifndef DONT_USE_EXTERN_C
|
| ++extern "C" {
|
| ++#endif
|
| ++#endif
|
| +
|
| +-#define JPEG_LIB_VERSION 62 /* Version 6b */
|
| +
|
| +-
|
| + /* Various constants determining the sizes of things.
|
| + * All of these are specified by the JPEG standard, so don't change them
|
| + * if you want to be compatible.
|
| +@@ -145,12 +152,17 @@
|
| + * Values of 1,2,4,8 are likely to be supported. Note that different
|
| + * components may receive different IDCT scalings.
|
| + */
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ int DCT_h_scaled_size;
|
| ++ int DCT_v_scaled_size;
|
| ++#else
|
| + int DCT_scaled_size;
|
| ++#endif
|
| + /* The downsampled dimensions are the component's actual, unpadded number
|
| + * of samples at the main buffer (preprocessing/compression interface), thus
|
| + * downsampled_width = ceil(image_width * Hi/Hmax)
|
| + * and similarly for height. For decompression, IDCT scaling is included, so
|
| +- * downsampled_width = ceil(image_width * Hi/Hmax * DCT_scaled_size/DCTSIZE)
|
| ++ * downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE)
|
| + */
|
| + JDIMENSION downsampled_width; /* actual width in samples */
|
| + JDIMENSION downsampled_height; /* actual height in samples */
|
| +@@ -165,7 +177,7 @@
|
| + int MCU_width; /* number of blocks per MCU, horizontally */
|
| + int MCU_height; /* number of blocks per MCU, vertically */
|
| + int MCU_blocks; /* MCU_width * MCU_height */
|
| +- int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_scaled_size */
|
| ++ int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */
|
| + int last_col_width; /* # of non-dummy blocks across in last MCU */
|
| + int last_row_height; /* # of non-dummy blocks down in last MCU */
|
| +
|
| +@@ -205,12 +217,13 @@
|
| + /* Known color spaces. */
|
| +
|
| + #define JCS_EXTENSIONS 1
|
| ++#define JCS_ALPHA_EXTENSIONS 1
|
| +
|
| + typedef enum {
|
| + JCS_UNKNOWN, /* error/unspecified */
|
| + JCS_GRAYSCALE, /* monochrome */
|
| + JCS_RGB, /* red/green/blue as specified by the RGB_RED, RGB_GREEN,
|
| +- RGB_BLUE, and RGB_PIXELSIZE macros */
|
| ++ RGB_BLUE, and RGB_PIXELSIZE macros */
|
| + JCS_YCbCr, /* Y/Cb/Cr (also known as YUV) */
|
| + JCS_CMYK, /* C/M/Y/K */
|
| + JCS_YCCK, /* Y/Cb/Cr/K */
|
| +@@ -220,6 +233,17 @@
|
| + JCS_EXT_BGRX, /* blue/green/red/x */
|
| + JCS_EXT_XBGR, /* x/blue/green/red */
|
| + JCS_EXT_XRGB, /* x/red/green/blue */
|
| ++ /* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX,
|
| ++ JCS_EXT_XBGR, or JCS_EXT_XRGB during decompression, the X byte is
|
| ++ undefined, and in order to ensure the best performance,
|
| ++ libjpeg-turbo can set that byte to whatever value it wishes. Use
|
| ++ the following colorspace constants to ensure that the X byte is set
|
| ++ to 0xFF, so that it can be interpreted as an opaque alpha
|
| ++ channel. */
|
| ++ JCS_EXT_RGBA, /* red/green/blue/alpha */
|
| ++ JCS_EXT_BGRA, /* blue/green/red/alpha */
|
| ++ JCS_EXT_ABGR, /* alpha/blue/green/red */
|
| ++ JCS_EXT_ARGB /* alpha/red/green/blue */
|
| + } J_COLOR_SPACE;
|
| +
|
| + /* DCT/IDCT algorithm options. */
|
| +@@ -301,6 +325,19 @@
|
| + * helper routines to simplify changing parameters.
|
| + */
|
| +
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ unsigned int scale_num, scale_denom; /* fraction by which to scale image */
|
| ++
|
| ++ JDIMENSION jpeg_width; /* scaled JPEG image width */
|
| ++ JDIMENSION jpeg_height; /* scaled JPEG image height */
|
| ++ /* Dimensions of actual JPEG image that will be written to file,
|
| ++ * derived from input dimensions by scaling factors above.
|
| ++ * These fields are computed by jpeg_start_compress().
|
| ++ * You can also use jpeg_calc_jpeg_dimensions() to determine these values
|
| ++ * in advance of calling jpeg_start_compress().
|
| ++ */
|
| ++#endif
|
| ++
|
| + int data_precision; /* bits of precision in image data */
|
| +
|
| + int num_components; /* # of color components in JPEG image */
|
| +@@ -308,14 +345,19 @@
|
| +
|
| + jpeg_component_info * comp_info;
|
| + /* comp_info[i] describes component that appears i'th in SOF */
|
| +-
|
| ++
|
| + JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
|
| +- /* ptrs to coefficient quantization tables, or NULL if not defined */
|
| +-
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ int q_scale_factor[NUM_QUANT_TBLS];
|
| ++#endif
|
| ++ /* ptrs to coefficient quantization tables, or NULL if not defined,
|
| ++ * and corresponding scale factors (percentage, initialized 100).
|
| ++ */
|
| ++
|
| + JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
|
| + JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
|
| + /* ptrs to Huffman coding tables, or NULL if not defined */
|
| +-
|
| ++
|
| + UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
|
| + UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
|
| + UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
|
| +@@ -331,6 +373,9 @@
|
| + boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */
|
| + boolean optimize_coding; /* TRUE=optimize entropy encoding parms */
|
| + boolean CCIR601_sampling; /* TRUE=first samples are cosited */
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
|
| ++#endif
|
| + int smoothing_factor; /* 1..100, or 0 for no input smoothing */
|
| + J_DCT_METHOD dct_method; /* DCT algorithm selector */
|
| +
|
| +@@ -374,6 +419,11 @@
|
| + int max_h_samp_factor; /* largest h_samp_factor */
|
| + int max_v_samp_factor; /* largest v_samp_factor */
|
| +
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */
|
| ++ int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */
|
| ++#endif
|
| ++
|
| + JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */
|
| + /* The coefficient controller receives data in units of MCU rows as defined
|
| + * for fully interleaved scans (whether the JPEG file is interleaved or not).
|
| +@@ -399,6 +449,12 @@
|
| +
|
| + int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++ int block_size; /* the basic DCT block size: 1..16 */
|
| ++ const int * natural_order; /* natural-order position array */
|
| ++ int lim_Se; /* min( Se, DCTSIZE2-1 ) */
|
| ++#endif
|
| ++
|
| + /*
|
| + * Links to compression subobjects (methods and private variables of modules)
|
| + */
|
| +@@ -545,6 +601,9 @@
|
| + jpeg_component_info * comp_info;
|
| + /* comp_info[i] describes component that appears i'th in SOF */
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++ boolean is_baseline; /* TRUE if Baseline SOF0 encountered */
|
| ++#endif
|
| + boolean progressive_mode; /* TRUE if SOFn specifies progressive mode */
|
| + boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */
|
| +
|
| +@@ -585,7 +644,12 @@
|
| + int max_h_samp_factor; /* largest h_samp_factor */
|
| + int max_v_samp_factor; /* largest v_samp_factor */
|
| +
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */
|
| ++ int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */
|
| ++#else
|
| + int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */
|
| ++#endif
|
| +
|
| + JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */
|
| + /* The coefficient controller's input and output progress is measured in
|
| +@@ -593,7 +657,7 @@
|
| + * in fully interleaved JPEG scans, but are used whether the scan is
|
| + * interleaved or not. We define an iMCU row as v_samp_factor DCT block
|
| + * rows of each component. Therefore, the IDCT output contains
|
| +- * v_samp_factor*DCT_scaled_size sample rows of a component per iMCU row.
|
| ++ * v_samp_factor*DCT_[v_]scaled_size sample rows of a component per iMCU row.
|
| + */
|
| +
|
| + JSAMPLE * sample_range_limit; /* table for fast range-limiting */
|
| +@@ -617,6 +681,14 @@
|
| +
|
| + int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++ /* These fields are derived from Se of first SOS marker.
|
| ++ */
|
| ++ int block_size; /* the basic DCT block size: 1..16 */
|
| ++ const int * natural_order; /* natural-order position array for entropy decode */
|
| ++ int lim_Se; /* min( Se, DCTSIZE2-1 ) for entropy decode */
|
| ++#endif
|
| ++
|
| + /* This field is shared between entropy decoder and marker parser.
|
| + * It is either zero or the code of a JPEG marker that has been
|
| + * read from the data source, but has not yet been processed.
|
| +@@ -846,11 +918,18 @@
|
| + #define jpeg_destroy_decompress jDestDecompress
|
| + #define jpeg_stdio_dest jStdDest
|
| + #define jpeg_stdio_src jStdSrc
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++#define jpeg_mem_dest jMemDest
|
| ++#define jpeg_mem_src jMemSrc
|
| ++#endif
|
| + #define jpeg_set_defaults jSetDefaults
|
| + #define jpeg_set_colorspace jSetColorspace
|
| + #define jpeg_default_colorspace jDefColorspace
|
| + #define jpeg_set_quality jSetQuality
|
| + #define jpeg_set_linear_quality jSetLQuality
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++#define jpeg_default_qtables jDefQTables
|
| ++#endif
|
| + #define jpeg_add_quant_table jAddQuantTable
|
| + #define jpeg_quality_scaling jQualityScaling
|
| + #define jpeg_simple_progression jSimProgress
|
| +@@ -860,6 +939,9 @@
|
| + #define jpeg_start_compress jStrtCompress
|
| + #define jpeg_write_scanlines jWrtScanlines
|
| + #define jpeg_finish_compress jFinCompress
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++#define jpeg_calc_jpeg_dimensions jCjpegDimensions
|
| ++#endif
|
| + #define jpeg_write_raw_data jWrtRawData
|
| + #define jpeg_write_marker jWrtMarker
|
| + #define jpeg_write_m_header jWrtMHeader
|
| +@@ -876,6 +958,9 @@
|
| + #define jpeg_input_complete jInComplete
|
| + #define jpeg_new_colormap jNewCMap
|
| + #define jpeg_consume_input jConsumeInput
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++#define jpeg_core_output_dimensions jCoreDimensions
|
| ++#endif
|
| + #define jpeg_calc_output_dimensions jCalcDimensions
|
| + #define jpeg_save_markers jSaveMarkers
|
| + #define jpeg_set_marker_processor jSetMarker
|
| +@@ -920,6 +1005,16 @@
|
| + EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
|
| + EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
|
| +
|
| ++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
| ++/* Data source and destination managers: memory buffers. */
|
| ++EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo,
|
| ++ unsigned char ** outbuffer,
|
| ++ unsigned long * outsize));
|
| ++EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo,
|
| ++ unsigned char * inbuffer,
|
| ++ unsigned long insize));
|
| ++#endif
|
| ++
|
| + /* Default parameter setup for compression */
|
| + EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
|
| + /* Compression parameter setup aids */
|
| +@@ -931,6 +1026,10 @@
|
| + EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
|
| + int scale_factor,
|
| + boolean force_baseline));
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
|
| ++ boolean force_baseline));
|
| ++#endif
|
| + EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
|
| + const unsigned int *basic_table,
|
| + int scale_factor,
|
| +@@ -950,12 +1049,17 @@
|
| + JDIMENSION num_lines));
|
| + EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
|
| +
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++/* Precalculate JPEG dimensions for current compression parameters. */
|
| ++EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo));
|
| ++#endif
|
| ++
|
| + /* Replaces jpeg_write_scanlines when writing raw downsampled data. */
|
| + EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
|
| + JSAMPIMAGE data,
|
| + JDIMENSION num_lines));
|
| +
|
| +-/* Write a special marker. See libjpeg.doc concerning safe usage. */
|
| ++/* Write a special marker. See libjpeg.txt concerning safe usage. */
|
| + EXTERN(void) jpeg_write_marker
|
| + JPP((j_compress_ptr cinfo, int marker,
|
| + const JOCTET * dataptr, unsigned int datalen));
|
| +@@ -986,6 +1090,8 @@
|
| + EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
|
| + JSAMPARRAY scanlines,
|
| + JDIMENSION max_lines));
|
| ++EXTERN(JDIMENSION) jpeg_skip_scanlines (j_decompress_ptr cinfo,
|
| ++ JDIMENSION num_lines);
|
| + EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
|
| +
|
| + /* Replaces jpeg_read_scanlines when reading raw downsampled data. */
|
| +@@ -1009,6 +1115,9 @@
|
| + #define JPEG_SCAN_COMPLETED 4 /* Completed last iMCU row of a scan */
|
| +
|
| + /* Precalculate output dimensions for current decompression parameters. */
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo));
|
| +#endif
|
| + EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
|
|
|
| + /* Control saving of COM and APPn markers into marker_list. */
|
| +@@ -1103,4 +1212,10 @@
|
| + #include "jerror.h" /* fetch error codes too */
|
| + #endif
|
|
|
| - /*
|
| -Index: jpeglib.h
|
| -===================================================================
|
| ---- jpeglib.h (revision 829)
|
| -+++ jpeglib.h (working copy)
|
| -@@ -15,6 +15,10 @@
|
| - #ifndef JPEGLIB_H
|
| - #define JPEGLIB_H
|
| -
|
| -+/* Begin chromium edits */
|
| -+#include "jpeglibmangler.h"
|
| -+/* End chromium edits */
|
| ++#ifdef __cplusplus
|
| ++#ifndef DONT_USE_EXTERN_C
|
| ++}
|
| ++#endif
|
| ++#endif
|
| +
|
| - /*
|
| - * First we include the configuration files that record how this
|
| - * installation of the JPEG library is set up. jconfig.h can be
|
| + #endif /* JPEGLIB_H */
|
| Index: jpeglibmangler.h
|
| ===================================================================
|
| --- jpeglibmangler.h (revision 0)
|
| -+++ jpeglibmangler.h (revision 0)
|
| -@@ -0,0 +1,113 @@
|
| ++++ jpeglibmangler.h (working copy)
|
| +@@ -0,0 +1,114 @@
|
| +// Copyright (c) 2009 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| @@ -305,6 +10237,7 @@ Index: jpeglibmangler.h
|
| +#define jpeg_read_header chromium_jpeg_read_header
|
| +#define jpeg_start_decompress chromium_jpeg_start_decompress
|
| +#define jpeg_read_scanlines chromium_jpeg_read_scanlines
|
| ++#define jpeg_skip_scanlines chromium_jpeg_skip_scanlines
|
| +#define jpeg_finish_decompress chromium_jpeg_finish_decompress
|
| +#define jpeg_read_raw_data chromium_jpeg_read_raw_data
|
| +#define jpeg_has_multiple_scans chromium_jpeg_has_multiple_scans
|
| @@ -334,376 +10267,1367 @@ Index: jpeglibmangler.h
|
| +#define jpeg_mem_term chromium_jpeg_mem_term
|
| +
|
| +#endif // THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_
|
| -Index: simd/jcgrass2-64.asm
|
| -===================================================================
|
| ---- simd/jcgrass2-64.asm (revision 829)
|
| -+++ simd/jcgrass2-64.asm (working copy)
|
| -@@ -30,7 +30,7 @@
|
| - SECTION SEG_CONST
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_rgb_gray_convert_sse2)
|
| -+ global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE
|
| -
|
| - EXTN(jconst_rgb_gray_convert_sse2):
|
| -
|
| -Index: simd/jiss2fst.asm
|
| +Index: jpegut.c
|
| ===================================================================
|
| ---- simd/jiss2fst.asm (revision 829)
|
| -+++ simd/jiss2fst.asm (working copy)
|
| -@@ -59,7 +59,7 @@
|
| - %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_idct_ifast_sse2)
|
| -+ global EXTN(jconst_idct_ifast_sse2) PRIVATE
|
| +--- jpegut.c (revision 829)
|
| ++++ jpegut.c (working copy)
|
| +@@ -19,11 +19,14 @@
|
| + #include "./rrtimer.h"
|
| + #include "./turbojpeg.h"
|
|
|
| - EXTN(jconst_idct_ifast_sse2):
|
| +-#define _catch(f) {if((f)==-1) {printf("TJPEG: %s\n", tjGetErrorStr()); goto finally;}}
|
| ++#define _catch(f) {if((f)==-1) {printf("TJPEG: %s\n", tjGetErrorStr()); bailout();}}
|
|
|
| -@@ -92,7 +92,7 @@
|
| - %define WK_NUM 2
|
| + const char *_subnamel[NUMSUBOPT]={"4:4:4", "4:2:2", "4:2:0", "GRAY"};
|
| + const char *_subnames[NUMSUBOPT]={"444", "422", "420", "GRAY"};
|
|
|
| - align 16
|
| -- global EXTN(jsimd_idct_ifast_sse2)
|
| -+ global EXTN(jsimd_idct_ifast_sse2) PRIVATE
|
| ++int exitstatus=0;
|
| ++#define bailout() {exitstatus=-1; goto finally;}
|
| ++
|
| + int pixels[9][3]=
|
| + {
|
| + {0, 255, 0},
|
| +@@ -70,7 +73,7 @@
|
| + }
|
| + }
|
|
|
| - EXTN(jsimd_idct_ifast_sse2):
|
| - push ebp
|
| -Index: simd/jcclrss2-64.asm
|
| +-int dumpbuf(unsigned char *buf, int w, int h, int ps, int flags)
|
| ++void dumpbuf(unsigned char *buf, int w, int h, int ps, int flags)
|
| + {
|
| + int roffset=(flags&TJ_BGR)?2:0, goffset=1, boffset=(flags&TJ_BGR)?0:2, i,
|
| + j;
|
| +@@ -177,12 +180,12 @@
|
| + if((outfile=fopen(filename, "wb"))==NULL)
|
| + {
|
| + printf("ERROR: Could not open %s for writing.\n", filename);
|
| +- goto finally;
|
| ++ bailout();
|
| + }
|
| + if(fwrite(jpegbuf, jpgbufsize, 1, outfile)!=1)
|
| + {
|
| + printf("ERROR: Could not write to %s.\n", filename);
|
| +- goto finally;
|
| ++ bailout();
|
| + }
|
| +
|
| + finally:
|
| +@@ -210,7 +213,7 @@
|
| +
|
| + if((bmpbuf=(unsigned char *)malloc(w*h*ps+1))==NULL)
|
| + {
|
| +- printf("ERROR: Could not allocate buffer\n"); goto finally;
|
| ++ printf("ERROR: Could not allocate buffer\n"); bailout();
|
| + }
|
| + initbuf(bmpbuf, w, h, ps, flags);
|
| + memset(jpegbuf, 0, TJBUFSIZE(w, h));
|
| +@@ -249,12 +252,12 @@
|
| + _catch(tjDecompressHeader(hnd, jpegbuf, jpegsize, &_w, &_h));
|
| + if(_w!=w || _h!=h)
|
| + {
|
| +- printf("Incorrect JPEG header\n"); goto finally;
|
| ++ printf("Incorrect JPEG header\n"); bailout();
|
| + }
|
| +
|
| + if((bmpbuf=(unsigned char *)malloc(w*h*ps+1))==NULL)
|
| + {
|
| +- printf("ERROR: Could not allocate buffer\n"); goto finally;
|
| ++ printf("ERROR: Could not allocate buffer\n"); bailout();
|
| + }
|
| + memset(bmpbuf, 0, w*ps*h);
|
| +
|
| +@@ -278,13 +281,13 @@
|
| +
|
| + if((jpegbuf=(unsigned char *)malloc(TJBUFSIZE(w, h))) == NULL)
|
| + {
|
| +- puts("ERROR: Could not allocate buffer."); goto finally;
|
| ++ puts("ERROR: Could not allocate buffer."); bailout();
|
| + }
|
| +
|
| + if((hnd=tjInitCompress())==NULL)
|
| +- {printf("Error in tjInitCompress():\n%s\n", tjGetErrorStr()); goto finally;}
|
| ++ {printf("Error in tjInitCompress():\n%s\n", tjGetErrorStr()); bailout();}
|
| + if((dhnd=tjInitDecompress())==NULL)
|
| +- {printf("Error in tjInitDecompress():\n%s\n", tjGetErrorStr()); goto finally;}
|
| ++ {printf("Error in tjInitDecompress():\n%s\n", tjGetErrorStr()); bailout();}
|
| +
|
| + gentestjpeg(hnd, jpegbuf, &size, w, h, ps, basefilename, subsamp, 100, 0);
|
| + gentestbmp(dhnd, jpegbuf, size, w, h, ps, basefilename, subsamp, 100, 0);
|
| +@@ -327,7 +330,7 @@
|
| + int i, j, i2; unsigned char *bmpbuf=NULL, *jpgbuf=NULL;
|
| + tjhandle hnd=NULL; unsigned long size;
|
| + if((hnd=tjInitCompress())==NULL)
|
| +- {printf("Error in tjInitCompress():\n%s\n", tjGetErrorStr()); goto finally;}
|
| ++ {printf("Error in tjInitCompress():\n%s\n", tjGetErrorStr()); bailout();}
|
| + printf("Buffer size regression test\n");
|
| + for(j=1; j<48; j++)
|
| + {
|
| +@@ -337,7 +340,7 @@
|
| + if((bmpbuf=(unsigned char *)malloc(i*j*4))==NULL
|
| + || (jpgbuf=(unsigned char *)malloc(TJBUFSIZE(i, j)))==NULL)
|
| + {
|
| +- printf("Memory allocation failure\n"); goto finally;
|
| ++ printf("Memory allocation failure\n"); bailout();
|
| + }
|
| + memset(bmpbuf, 0, i*j*4);
|
| + for(i2=0; i2<i*j; i2++)
|
| +@@ -353,7 +356,7 @@
|
| + if((bmpbuf=(unsigned char *)malloc(j*i*4))==NULL
|
| + || (jpgbuf=(unsigned char *)malloc(TJBUFSIZE(j, i)))==NULL)
|
| + {
|
| +- printf("Memory allocation failure\n"); goto finally;
|
| ++ printf("Memory allocation failure\n"); bailout();
|
| + }
|
| + for(i2=0; i2<j*i*4; i2++)
|
| + {
|
| +@@ -380,5 +383,5 @@
|
| + dotest(35, 41, 4, TJ_GRAYSCALE, "test");
|
| + dotest1();
|
| +
|
| +- return 0;
|
| ++ return exitstatus;
|
| + }
|
| +Index: jpgtest.cxx
|
| ===================================================================
|
| ---- simd/jcclrss2-64.asm (revision 829)
|
| -+++ simd/jcclrss2-64.asm (working copy)
|
| -@@ -37,7 +37,7 @@
|
| -
|
| - align 16
|
| -
|
| -- global EXTN(jsimd_rgb_ycc_convert_sse2)
|
| -+ global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE
|
| -
|
| - EXTN(jsimd_rgb_ycc_convert_sse2):
|
| - push rbp
|
| -Index: simd/jiss2red-64.asm
|
| +--- jpgtest.cxx (revision 829)
|
| ++++ jpgtest.cxx (working copy)
|
| +@@ -322,22 +322,22 @@
|
| + if(!stricmp(argv[i], "-tile")) dotile=1;
|
| + if(!stricmp(argv[i], "-forcesse3"))
|
| + {
|
| +- printf("Using SSE3 code in Intel compressor\n");
|
| ++ printf("Using SSE3 code\n");
|
| + forcesse3=1;
|
| + }
|
| + if(!stricmp(argv[i], "-forcesse2"))
|
| + {
|
| +- printf("Using SSE2 code in Intel compressor\n");
|
| ++ printf("Using SSE2 code\n");
|
| + forcesse2=1;
|
| + }
|
| + if(!stricmp(argv[i], "-forcesse"))
|
| + {
|
| +- printf("Using SSE code in Intel compressor\n");
|
| ++ printf("Using SSE code\n");
|
| + forcesse=1;
|
| + }
|
| + if(!stricmp(argv[i], "-forcemmx"))
|
| + {
|
| +- printf("Using MMX code in Intel compressor\n");
|
| ++ printf("Using MMX code\n");
|
| + forcemmx=1;
|
| + }
|
| + if(!stricmp(argv[i], "-fastupsample"))
|
| +Index: jquant1.c
|
| ===================================================================
|
| ---- simd/jiss2red-64.asm (revision 829)
|
| -+++ simd/jiss2red-64.asm (working copy)
|
| -@@ -73,7 +73,7 @@
|
| - SECTION SEG_CONST
|
| +--- jquant1.c (revision 829)
|
| ++++ jquant1.c (working copy)
|
| +@@ -1,9 +1,10 @@
|
| + /*
|
| + * jquant1.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1996, Thomas G. Lane.
|
| ++ * libjpeg-turbo Modifications:
|
| + * Copyright (C) 2009, D. R. Commander
|
| +- * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains 1-pass color quantization (color mapping) routines.
|
| +Index: jquant2.c
|
| +===================================================================
|
| +--- jquant2.c (revision 829)
|
| ++++ jquant2.c (working copy)
|
| +@@ -1,9 +1,10 @@
|
| + /*
|
| + * jquant2.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1996, Thomas G. Lane.
|
| ++ * libjpeg-turbo Modifications:
|
| + * Copyright (C) 2009, D. R. Commander.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains 2-pass color quantization (color mapping) routines.
|
| +Index: jsimd.h
|
| +===================================================================
|
| +--- jsimd.h (revision 829)
|
| ++++ jsimd.h (working copy)
|
| +@@ -2,9 +2,11 @@
|
| + * jsimd.h
|
| + *
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++ * Copyright 2011 D. R. Commander
|
| + *
|
| + * Based on the x86 SIMD extension for IJG JPEG library,
|
| + * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| ++ * For conditions of distribution and use, see copyright notice in jsimdext.inc
|
| + *
|
| + */
|
|
|
| - alignz 16
|
| -- global EXTN(jconst_idct_red_sse2)
|
| -+ global EXTN(jconst_idct_red_sse2) PRIVATE
|
| +@@ -12,8 +14,10 @@
|
| +
|
| + #ifdef NEED_SHORT_EXTERNAL_NAMES
|
| + #define jsimd_can_rgb_ycc jSCanRgbYcc
|
| ++#define jsimd_can_rgb_gray jSCanRgbGry
|
| + #define jsimd_can_ycc_rgb jSCanYccRgb
|
| + #define jsimd_rgb_ycc_convert jSRgbYccConv
|
| ++#define jsimd_rgb_gray_convert jSRgbGryConv
|
| + #define jsimd_ycc_rgb_convert jSYccRgbConv
|
| + #define jsimd_can_h2v2_downsample jSCanH2V2Down
|
| + #define jsimd_can_h2v1_downsample jSCanH2V1Down
|
| +@@ -34,6 +38,7 @@
|
| + #endif /* NEED_SHORT_EXTERNAL_NAMES */
|
| +
|
| + EXTERN(int) jsimd_can_rgb_ycc JPP((void));
|
| ++EXTERN(int) jsimd_can_rgb_gray JPP((void));
|
| + EXTERN(int) jsimd_can_ycc_rgb JPP((void));
|
| +
|
| + EXTERN(void) jsimd_rgb_ycc_convert
|
| +@@ -40,6 +45,10 @@
|
| + JPP((j_compress_ptr cinfo,
|
| + JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| + JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_rgb_gray_convert
|
| ++ JPP((j_compress_ptr cinfo,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| + EXTERN(void) jsimd_ycc_rgb_convert
|
| + JPP((j_decompress_ptr cinfo,
|
| + JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| +Index: jsimd_none.c
|
| +===================================================================
|
| +--- jsimd_none.c (revision 829)
|
| ++++ jsimd_none.c (working copy)
|
| +@@ -2,10 +2,11 @@
|
| + * jsimd_none.c
|
| + *
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +- * Copyright 2009 D. R. Commander
|
| ++ * Copyright 2009-2011 D. R. Commander
|
| + *
|
| + * Based on the x86 SIMD extension for IJG JPEG library,
|
| + * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| ++ * For conditions of distribution and use, see copyright notice in jsimdext.inc
|
| + *
|
| + * This file contains stubs for when there is no SIMD support available.
|
| + */
|
| +@@ -24,6 +25,12 @@
|
| + }
|
|
|
| - EXTN(jconst_idct_red_sse2):
|
| + GLOBAL(int)
|
| ++jsimd_can_rgb_gray (void)
|
| ++{
|
| ++ return 0;
|
| ++}
|
| ++
|
| ++GLOBAL(int)
|
| + jsimd_can_ycc_rgb (void)
|
| + {
|
| + return 0;
|
| +@@ -37,6 +44,13 @@
|
| + }
|
|
|
| -@@ -114,7 +114,7 @@
|
| - %define WK_NUM 2
|
| + GLOBAL(void)
|
| ++jsimd_rgb_gray_convert (j_compress_ptr cinfo,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows)
|
| ++{
|
| ++}
|
| ++
|
| ++GLOBAL(void)
|
| + jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
| + JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| + JSAMPARRAY output_buf, int num_rows)
|
| +Index: jsimddct.h
|
| +===================================================================
|
| +--- jsimddct.h (revision 829)
|
| ++++ jsimddct.h (working copy)
|
| +@@ -5,6 +5,7 @@
|
| + *
|
| + * Based on the x86 SIMD extension for IJG JPEG library,
|
| + * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| ++ * For conditions of distribution and use, see copyright notice in jsimdext.inc
|
| + *
|
| + */
|
|
|
| - align 16
|
| -- global EXTN(jsimd_idct_4x4_sse2)
|
| -+ global EXTN(jsimd_idct_4x4_sse2) PRIVATE
|
| +Index: jversion.h
|
| +===================================================================
|
| +--- jversion.h (revision 829)
|
| ++++ jversion.h (working copy)
|
| +@@ -1,8 +1,10 @@
|
| + /*
|
| + * jversion.h
|
| + *
|
| +- * Copyright (C) 1991-1998, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| ++ * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
|
| ++ * Modifications:
|
| ++ * Copyright (C) 2010, 2012-2014, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains software version identification.
|
| +@@ -9,6 +11,22 @@
|
| + */
|
|
|
| - EXTN(jsimd_idct_4x4_sse2):
|
| - push rbp
|
| -@@ -413,7 +413,7 @@
|
| - ; r13 = JDIMENSION output_col
|
|
|
| - align 16
|
| -- global EXTN(jsimd_idct_2x2_sse2)
|
| -+ global EXTN(jsimd_idct_2x2_sse2) PRIVATE
|
| ++#if JPEG_LIB_VERSION >= 80
|
| ++
|
| ++#define JVERSION "8d 15-Jan-2012"
|
| ++
|
| ++#elif JPEG_LIB_VERSION >= 70
|
| ++
|
| ++#define JVERSION "7 27-Jun-2009"
|
| ++
|
| ++#else
|
| ++
|
| + #define JVERSION "6b 27-Mar-1998"
|
|
|
| - EXTN(jsimd_idct_2x2_sse2):
|
| - push rbp
|
| -Index: simd/ji3dnflt.asm
|
| +-#define JCOPYRIGHT "Copyright (C) 1998, Thomas G. Lane"
|
| ++#endif
|
| ++
|
| ++#define JCOPYRIGHT "Copyright (C) 1991-2012 Thomas G. Lane, Guido Vollbeding\n" \
|
| ++ "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
|
| ++ "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
|
| ++ "Copyright (C) 2009-2014 D. R. Commander\n" \
|
| ++ "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)"
|
| +Index: rdbmp.c
|
| ===================================================================
|
| ---- simd/ji3dnflt.asm (revision 829)
|
| -+++ simd/ji3dnflt.asm (working copy)
|
| -@@ -27,7 +27,7 @@
|
| - SECTION SEG_CONST
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_idct_float_3dnow)
|
| -+ global EXTN(jconst_idct_float_3dnow) PRIVATE
|
| +--- rdbmp.c (revision 829)
|
| ++++ rdbmp.c (working copy)
|
| +@@ -1,8 +1,11 @@
|
| + /*
|
| + * rdbmp.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1994-1996, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * Modified 2009-2010 by Guido Vollbeding.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Modified 2011 by Siarhei Siamashka.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains routines to read input images in Microsoft "BMP"
|
| +@@ -177,10 +180,41 @@
|
| + }
|
|
|
| - EXTN(jconst_idct_float_3dnow):
|
|
|
| -@@ -63,7 +63,7 @@
|
| - ; FAST_FLOAT workspace[DCTSIZE2]
|
| ++METHODDEF(JDIMENSION)
|
| ++get_32bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
| ++/* This version is for reading 32-bit pixels */
|
| ++{
|
| ++ bmp_source_ptr source = (bmp_source_ptr) sinfo;
|
| ++ JSAMPARRAY image_ptr;
|
| ++ register JSAMPROW inptr, outptr;
|
| ++ register JDIMENSION col;
|
| ++
|
| ++ /* Fetch next row from virtual array */
|
| ++ source->source_row--;
|
| ++ image_ptr = (*cinfo->mem->access_virt_sarray)
|
| ++ ((j_common_ptr) cinfo, source->whole_image,
|
| ++ source->source_row, (JDIMENSION) 1, FALSE);
|
| ++ /* Transfer data. Note source values are in BGR order
|
| ++ * (even though Microsoft's own documents say the opposite).
|
| ++ */
|
| ++ inptr = image_ptr[0];
|
| ++ outptr = source->pub.buffer[0];
|
| ++ for (col = cinfo->image_width; col > 0; col--) {
|
| ++ outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */
|
| ++ outptr[1] = *inptr++;
|
| ++ outptr[0] = *inptr++;
|
| ++ inptr++; /* skip the 4th byte (Alpha channel) */
|
| ++ outptr += 3;
|
| ++ }
|
| ++
|
| ++ return 1;
|
| ++}
|
| ++
|
| ++
|
| + /*
|
| + * This method loads the image into whole_image during the first call on
|
| + * get_pixel_rows. The get_pixel_rows pointer is then adjusted to call
|
| +- * get_8bit_row or get_24bit_row on subsequent calls.
|
| ++ * get_8bit_row, get_24bit_row, or get_32bit_row on subsequent calls.
|
| + */
|
|
|
| - align 16
|
| -- global EXTN(jsimd_idct_float_3dnow)
|
| -+ global EXTN(jsimd_idct_float_3dnow) PRIVATE
|
| + METHODDEF(JDIMENSION)
|
| +@@ -188,10 +222,9 @@
|
| + {
|
| + bmp_source_ptr source = (bmp_source_ptr) sinfo;
|
| + register FILE *infile = source->pub.input_file;
|
| +- register int c;
|
| + register JSAMPROW out_ptr;
|
| + JSAMPARRAY image_ptr;
|
| +- JDIMENSION row, col;
|
| ++ JDIMENSION row;
|
| + cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress;
|
| +
|
| + /* Read the data into a virtual array in input-file row order. */
|
| +@@ -205,11 +238,11 @@
|
| + ((j_common_ptr) cinfo, source->whole_image,
|
| + row, (JDIMENSION) 1, TRUE);
|
| + out_ptr = image_ptr[0];
|
| +- for (col = source->row_width; col > 0; col--) {
|
| +- /* inline copy of read_byte() for speed */
|
| +- if ((c = getc(infile)) == EOF)
|
| +- ERREXIT(cinfo, JERR_INPUT_EOF);
|
| +- *out_ptr++ = (JSAMPLE) c;
|
| ++ if (fread(out_ptr, 1, source->row_width, infile) != source->row_width) {
|
| ++ if (feof(infile))
|
| ++ ERREXIT(cinfo, JERR_INPUT_EOF);
|
| ++ else
|
| ++ ERREXIT(cinfo, JERR_FILE_READ);
|
| + }
|
| + }
|
| + if (progress != NULL)
|
| +@@ -223,6 +256,9 @@
|
| + case 24:
|
| + source->pub.get_pixel_rows = get_24bit_row;
|
| + break;
|
| ++ case 32:
|
| ++ source->pub.get_pixel_rows = get_32bit_row;
|
| ++ break;
|
| + default:
|
| + ERREXIT(cinfo, JERR_BMP_BADDEPTH);
|
| + }
|
| +@@ -251,8 +287,8 @@
|
| + (((INT32) UCH(array[offset+3])) << 24))
|
| + INT32 bfOffBits;
|
| + INT32 headerSize;
|
| +- INT32 biWidth = 0; /* initialize to avoid compiler warning */
|
| +- INT32 biHeight = 0;
|
| ++ INT32 biWidth;
|
| ++ INT32 biHeight;
|
| + unsigned int biPlanes;
|
| + INT32 biCompression;
|
| + INT32 biXPelsPerMeter,biYPelsPerMeter;
|
| +@@ -300,8 +336,6 @@
|
| + ERREXIT(cinfo, JERR_BMP_BADDEPTH);
|
| + break;
|
| + }
|
| +- if (biPlanes != 1)
|
| +- ERREXIT(cinfo, JERR_BMP_BADPLANES);
|
| + break;
|
| + case 40:
|
| + case 64:
|
| +@@ -325,12 +359,13 @@
|
| + case 24: /* RGB image */
|
| + TRACEMS2(cinfo, 1, JTRC_BMP, (int) biWidth, (int) biHeight);
|
| + break;
|
| ++ case 32: /* RGB image + Alpha channel */
|
| ++ TRACEMS2(cinfo, 1, JTRC_BMP, (int) biWidth, (int) biHeight);
|
| ++ break;
|
| + default:
|
| + ERREXIT(cinfo, JERR_BMP_BADDEPTH);
|
| + break;
|
| + }
|
| +- if (biPlanes != 1)
|
| +- ERREXIT(cinfo, JERR_BMP_BADPLANES);
|
| + if (biCompression != 0)
|
| + ERREXIT(cinfo, JERR_BMP_COMPRESSED);
|
| +
|
| +@@ -343,9 +378,14 @@
|
| + break;
|
| + default:
|
| + ERREXIT(cinfo, JERR_BMP_BADHEADER);
|
| +- break;
|
| ++ return;
|
| + }
|
|
|
| - EXTN(jsimd_idct_float_3dnow):
|
| - push ebp
|
| -Index: simd/jsimdcpu.asm
|
| ++ if (biWidth <= 0 || biHeight <= 0)
|
| ++ ERREXIT(cinfo, JERR_BMP_EMPTY);
|
| ++ if (biPlanes != 1)
|
| ++ ERREXIT(cinfo, JERR_BMP_BADPLANES);
|
| ++
|
| + /* Compute distance to bitmap data --- will adjust for colormap below */
|
| + bPad = bfOffBits - (headerSize + 14);
|
| +
|
| +@@ -375,6 +415,8 @@
|
| + /* Compute row width in file, including padding to 4-byte boundary */
|
| + if (source->bits_per_pixel == 24)
|
| + row_width = (JDIMENSION) (biWidth * 3);
|
| ++ else if (source->bits_per_pixel == 32)
|
| ++ row_width = (JDIMENSION) (biWidth * 4);
|
| + else
|
| + row_width = (JDIMENSION) biWidth;
|
| + while ((row_width & 3) != 0) row_width++;
|
| +Index: rdppm.c
|
| ===================================================================
|
| ---- simd/jsimdcpu.asm (revision 829)
|
| -+++ simd/jsimdcpu.asm (working copy)
|
| -@@ -29,7 +29,7 @@
|
| - ;
|
| -
|
| - align 16
|
| -- global EXTN(jpeg_simd_cpu_support)
|
| -+ global EXTN(jpeg_simd_cpu_support) PRIVATE
|
| -
|
| - EXTN(jpeg_simd_cpu_support):
|
| - push ebx
|
| -Index: simd/jdmerss2-64.asm
|
| +--- rdppm.c (revision 829)
|
| ++++ rdppm.c (working copy)
|
| +@@ -2,6 +2,7 @@
|
| + * rdppm.c
|
| + *
|
| + * Copyright (C) 1991-1997, Thomas G. Lane.
|
| ++ * Modified 2009 by Bill Allombert, Guido Vollbeding.
|
| + * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| +@@ -250,8 +251,8 @@
|
| + bufferptr = source->iobuffer;
|
| + for (col = cinfo->image_width; col > 0; col--) {
|
| + register int temp;
|
| +- temp = UCH(*bufferptr++);
|
| +- temp |= UCH(*bufferptr++) << 8;
|
| ++ temp = UCH(*bufferptr++) << 8;
|
| ++ temp |= UCH(*bufferptr++);
|
| + *ptr++ = rescale[temp];
|
| + }
|
| + return 1;
|
| +@@ -274,14 +275,14 @@
|
| + bufferptr = source->iobuffer;
|
| + for (col = cinfo->image_width; col > 0; col--) {
|
| + register int temp;
|
| +- temp = UCH(*bufferptr++);
|
| +- temp |= UCH(*bufferptr++) << 8;
|
| ++ temp = UCH(*bufferptr++) << 8;
|
| ++ temp |= UCH(*bufferptr++);
|
| + *ptr++ = rescale[temp];
|
| +- temp = UCH(*bufferptr++);
|
| +- temp |= UCH(*bufferptr++) << 8;
|
| ++ temp = UCH(*bufferptr++) << 8;
|
| ++ temp |= UCH(*bufferptr++);
|
| + *ptr++ = rescale[temp];
|
| +- temp = UCH(*bufferptr++);
|
| +- temp |= UCH(*bufferptr++) << 8;
|
| ++ temp = UCH(*bufferptr++) << 8;
|
| ++ temp |= UCH(*bufferptr++);
|
| + *ptr++ = rescale[temp];
|
| + }
|
| + return 1;
|
| +Index: rdswitch.c
|
| ===================================================================
|
| ---- simd/jdmerss2-64.asm (revision 829)
|
| -+++ simd/jdmerss2-64.asm (working copy)
|
| -@@ -35,7 +35,7 @@
|
| - SECTION SEG_CONST
|
| +--- rdswitch.c (revision 829)
|
| ++++ rdswitch.c (working copy)
|
| +@@ -1,8 +1,10 @@
|
| + /*
|
| + * rdswitch.c
|
| + *
|
| ++ * This file was part of the Independent JPEG Group's software:
|
| + * Copyright (C) 1991-1996, Thomas G. Lane.
|
| +- * This file is part of the Independent JPEG Group's software.
|
| ++ * libjpeg-turbo Modifications:
|
| ++ * Copyright (C) 2010, D. R. Commander.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| + * This file contains routines to process some of cjpeg's more complicated
|
| +@@ -9,6 +11,7 @@
|
| + * command-line switches. Switches processed here are:
|
| + * -qtables file Read quantization tables from text file
|
| + * -scans file Read scan script from text file
|
| ++ * -quality N[,N,...] Set quality ratings
|
| + * -qslots N[,N,...] Set component quantization table selectors
|
| + * -sample HxV[,HxV,...] Set component sampling factors
|
| + */
|
| +@@ -69,9 +72,12 @@
|
| + }
|
|
|
| - alignz 16
|
| -- global EXTN(jconst_merged_upsample_sse2)
|
| -+ global EXTN(jconst_merged_upsample_sse2) PRIVATE
|
|
|
| - EXTN(jconst_merged_upsample_sse2):
|
| ++#if JPEG_LIB_VERSION < 70
|
| ++static int q_scale_factor[NUM_QUANT_TBLS] = {100, 100, 100, 100};
|
| ++#endif
|
| ++
|
| + GLOBAL(boolean)
|
| +-read_quant_tables (j_compress_ptr cinfo, char * filename,
|
| +- int scale_factor, boolean force_baseline)
|
| ++read_quant_tables (j_compress_ptr cinfo, char * filename, boolean force_baseline)
|
| + /* Read a set of quantization tables from the specified file.
|
| + * The file is plain ASCII text: decimal numbers with whitespace between.
|
| + * Comments preceded by '#' may be included in the file.
|
| +@@ -108,7 +114,13 @@
|
| + }
|
| + table[i] = (unsigned int) val;
|
| + }
|
| +- jpeg_add_quant_table(cinfo, tblno, table, scale_factor, force_baseline);
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ jpeg_add_quant_table(cinfo, tblno, table, cinfo->q_scale_factor[tblno],
|
| ++ force_baseline);
|
| ++#else
|
| ++ jpeg_add_quant_table(cinfo, tblno, table, q_scale_factor[tblno],
|
| ++ force_baseline);
|
| ++#endif
|
| + tblno++;
|
| + }
|
|
|
| -Index: simd/jdsammmx.asm
|
| -===================================================================
|
| ---- simd/jdsammmx.asm (revision 829)
|
| -+++ simd/jdsammmx.asm (working copy)
|
| -@@ -22,7 +22,7 @@
|
| - SECTION SEG_CONST
|
| +@@ -262,7 +274,85 @@
|
| + #endif /* C_MULTISCAN_FILES_SUPPORTED */
|
|
|
| - alignz 16
|
| -- global EXTN(jconst_fancy_upsample_mmx)
|
| -+ global EXTN(jconst_fancy_upsample_mmx) PRIVATE
|
|
|
| - EXTN(jconst_fancy_upsample_mmx):
|
| ++#if JPEG_LIB_VERSION < 70
|
| ++/* These are the sample quantization tables given in JPEG spec section K.1.
|
| ++ * The spec says that the values given produce "good" quality, and
|
| ++ * when divided by 2, "very good" quality.
|
| ++ */
|
| ++static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
|
| ++ 16, 11, 10, 16, 24, 40, 51, 61,
|
| ++ 12, 12, 14, 19, 26, 58, 60, 55,
|
| ++ 14, 13, 16, 24, 40, 57, 69, 56,
|
| ++ 14, 17, 22, 29, 51, 87, 80, 62,
|
| ++ 18, 22, 37, 56, 68, 109, 103, 77,
|
| ++ 24, 35, 55, 64, 81, 104, 113, 92,
|
| ++ 49, 64, 78, 87, 103, 121, 120, 101,
|
| ++ 72, 92, 95, 98, 112, 100, 103, 99
|
| ++};
|
| ++static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
|
| ++ 17, 18, 24, 47, 99, 99, 99, 99,
|
| ++ 18, 21, 26, 66, 99, 99, 99, 99,
|
| ++ 24, 26, 56, 99, 99, 99, 99, 99,
|
| ++ 47, 66, 99, 99, 99, 99, 99, 99,
|
| ++ 99, 99, 99, 99, 99, 99, 99, 99,
|
| ++ 99, 99, 99, 99, 99, 99, 99, 99,
|
| ++ 99, 99, 99, 99, 99, 99, 99, 99,
|
| ++ 99, 99, 99, 99, 99, 99, 99, 99
|
| ++};
|
| ++
|
| ++
|
| ++LOCAL(void)
|
| ++jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
|
| ++{
|
| ++ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
|
| ++ q_scale_factor[0], force_baseline);
|
| ++ jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
|
| ++ q_scale_factor[1], force_baseline);
|
| ++}
|
| ++#endif
|
| ++
|
| ++
|
| + GLOBAL(boolean)
|
| ++set_quality_ratings (j_compress_ptr cinfo, char *arg, boolean force_baseline)
|
| ++/* Process a quality-ratings parameter string, of the form
|
| ++ * N[,N,...]
|
| ++ * If there are more q-table slots than parameters, the last value is replicated.
|
| ++ */
|
| ++{
|
| ++ int val = 75; /* default value */
|
| ++ int tblno;
|
| ++ char ch;
|
| ++
|
| ++ for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
|
| ++ if (*arg) {
|
| ++ ch = ','; /* if not set by sscanf, will be ',' */
|
| ++ if (sscanf(arg, "%d%c", &val, &ch) < 1)
|
| ++ return FALSE;
|
| ++ if (ch != ',') /* syntax check */
|
| ++ return FALSE;
|
| ++ /* Convert user 0-100 rating to percentage scaling */
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val);
|
| ++#else
|
| ++ q_scale_factor[tblno] = jpeg_quality_scaling(val);
|
| ++#endif
|
| ++ while (*arg && *arg++ != ',') /* advance to next segment of arg string */
|
| ++ ;
|
| ++ } else {
|
| ++ /* reached end of parameter, set remaining factors to last value */
|
| ++#if JPEG_LIB_VERSION >= 70
|
| ++ cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val);
|
| ++#else
|
| ++ q_scale_factor[tblno] = jpeg_quality_scaling(val);
|
| ++#endif
|
| ++ }
|
| ++ }
|
| ++ jpeg_default_qtables(cinfo, force_baseline);
|
| ++ return TRUE;
|
| ++}
|
| ++
|
| ++
|
| ++GLOBAL(boolean)
|
| + set_quant_slots (j_compress_ptr cinfo, char *arg)
|
| + /* Process a quantization-table-selectors parameter string, of the form
|
| + * N[,N,...]
|
| +Index: rrutil.h
|
| +===================================================================
|
| +--- rrutil.h (revision 829)
|
| ++++ rrutil.h (working copy)
|
| +@@ -1,5 +1,6 @@
|
| + /* Copyright (C)2004 Landmark Graphics Corporation
|
| + * Copyright (C)2005 Sun Microsystems, Inc.
|
| ++ * Copyright (C)2010 D. R. Commander
|
| + *
|
| + * This library is free software and may be redistributed and/or modified under
|
| + * the terms of the wxWindows Library License, Version 3.1 or (at your option)
|
| +@@ -47,9 +48,9 @@
|
| + static __inline int numprocs(void)
|
| + {
|
| + #ifdef _WIN32
|
| +- DWORD ProcAff, SysAff, i; int count=0;
|
| ++ DWORD_PTR ProcAff, SysAff, i; int count=0;
|
| + if(!GetProcessAffinityMask(GetCurrentProcess(), &ProcAff, &SysAff)) return(1);
|
| +- for(i=0; i<32; i++) if(ProcAff&(1<<i)) count++;
|
| ++ for(i=0; i<sizeof(long*)*8; i++) if(ProcAff&(1LL<<i)) count++;
|
| + return(count);
|
| + #elif defined (__APPLE__)
|
| + return(1);
|
| +Index: simd/jcclrmmx.asm
|
| +===================================================================
|
| +--- simd/jcclrmmx.asm (revision 829)
|
| ++++ simd/jcclrmmx.asm (working copy)
|
| +@@ -19,8 +19,6 @@
|
| + %include "jcolsamp.inc"
|
|
|
| -@@ -58,7 +58,7 @@
|
| - %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
| + ; --------------------------------------------------------------------------
|
| +- SECTION SEG_TEXT
|
| +- BITS 32
|
| + ;
|
| + ; Convert some rows of samples to the output colorspace.
|
| + ;
|
| +@@ -42,7 +40,7 @@
|
| + %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
|
|
| align 16
|
| -- global EXTN(jsimd_h2v1_fancy_upsample_mmx)
|
| -+ global EXTN(jsimd_h2v1_fancy_upsample_mmx) PRIVATE
|
| +- global EXTN(jsimd_rgb_ycc_convert_mmx)
|
| ++ global EXTN(jsimd_rgb_ycc_convert_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_h2v1_fancy_upsample_mmx):
|
| + EXTN(jsimd_rgb_ycc_convert_mmx):
|
| push ebp
|
| -@@ -216,7 +216,7 @@
|
| - %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
| +@@ -474,3 +472,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| - align 16
|
| -- global EXTN(jsimd_h2v2_fancy_upsample_mmx)
|
| -+ global EXTN(jsimd_h2v2_fancy_upsample_mmx) PRIVATE
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jcclrss2-64.asm
|
| +===================================================================
|
| +--- simd/jcclrss2-64.asm (revision 829)
|
| ++++ simd/jcclrss2-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jcclrss2.asm - colorspace conversion (64-bit SSE2)
|
| ++; jcclrss2-64.asm - colorspace conversion (64-bit SSE2)
|
| + ;
|
| + ; x86 SIMD extension for IJG JPEG library
|
| + ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| +@@ -17,8 +17,6 @@
|
| + %include "jcolsamp.inc"
|
|
|
| - EXTN(jsimd_h2v2_fancy_upsample_mmx):
|
| - push ebp
|
| -@@ -542,7 +542,7 @@
|
| - %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
| + ; --------------------------------------------------------------------------
|
| +- SECTION SEG_TEXT
|
| +- BITS 64
|
| + ;
|
| + ; Convert some rows of samples to the output colorspace.
|
| + ;
|
| +@@ -39,7 +37,7 @@
|
|
|
| align 16
|
| -- global EXTN(jsimd_h2v1_upsample_mmx)
|
| -+ global EXTN(jsimd_h2v1_upsample_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_h2v1_upsample_mmx):
|
| - push ebp
|
| -@@ -643,7 +643,7 @@
|
| - %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
| +- global EXTN(jsimd_rgb_ycc_convert_sse2)
|
| ++ global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE
|
| +
|
| + EXTN(jsimd_rgb_ycc_convert_sse2):
|
| + push rbp
|
| +@@ -49,8 +47,8 @@
|
| + mov [rsp],rax
|
| + mov rbp,rsp ; rbp = aligned rbp
|
| + lea rsp, [wk(0)]
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + mov rcx, r10
|
| + test rcx,rcx
|
| +@@ -70,7 +68,7 @@
|
| + pop rcx
|
| +
|
| + mov rsi, r11
|
| +- mov rax, r14
|
| ++ mov eax, r14d
|
| + test rax,rax
|
| + jle near .return
|
| + .rowloop:
|
| +@@ -475,10 +473,13 @@
|
| + jg near .rowloop
|
| +
|
| + .return:
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + mov rsp,rbp ; rsp <- aligned rbp
|
| + pop rsp ; rsp <- original rbp
|
| + pop rbp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jcclrss2.asm
|
| +===================================================================
|
| +--- simd/jcclrss2.asm (revision 829)
|
| ++++ simd/jcclrss2.asm (working copy)
|
| +@@ -16,8 +16,6 @@
|
| + %include "jcolsamp.inc"
|
| +
|
| + ; --------------------------------------------------------------------------
|
| +- SECTION SEG_TEXT
|
| +- BITS 32
|
| + ;
|
| + ; Convert some rows of samples to the output colorspace.
|
| + ;
|
| +@@ -40,7 +38,7 @@
|
|
|
| align 16
|
| -- global EXTN(jsimd_h2v2_upsample_mmx)
|
| -+ global EXTN(jsimd_h2v2_upsample_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_h2v2_upsample_mmx):
|
| +- global EXTN(jsimd_rgb_ycc_convert_sse2)
|
| ++ global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE
|
| +
|
| + EXTN(jsimd_rgb_ycc_convert_sse2):
|
| push ebp
|
| -Index: simd/jdmrgmmx.asm
|
| +@@ -500,3 +498,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jccolmmx.asm
|
| ===================================================================
|
| ---- simd/jdmrgmmx.asm (revision 829)
|
| -+++ simd/jdmrgmmx.asm (working copy)
|
| -@@ -40,7 +40,7 @@
|
| - %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
| +--- simd/jccolmmx.asm (revision 829)
|
| ++++ simd/jccolmmx.asm (working copy)
|
| +@@ -37,7 +37,7 @@
|
| + SECTION SEG_CONST
|
|
|
| - align 16
|
| -- global EXTN(jsimd_h2v1_merged_upsample_mmx)
|
| -+ global EXTN(jsimd_h2v1_merged_upsample_mmx) PRIVATE
|
| + alignz 16
|
| +- global EXTN(jconst_rgb_ycc_convert_mmx)
|
| ++ global EXTN(jconst_rgb_ycc_convert_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_h2v1_merged_upsample_mmx):
|
| - push ebp
|
| -@@ -409,7 +409,7 @@
|
| - %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
|
| + EXTN(jconst_rgb_ycc_convert_mmx):
|
|
|
| - align 16
|
| -- global EXTN(jsimd_h2v2_merged_upsample_mmx)
|
| -+ global EXTN(jsimd_h2v2_merged_upsample_mmx) PRIVATE
|
| +@@ -51,6 +51,9 @@
|
| + alignz 16
|
|
|
| - EXTN(jsimd_h2v2_merged_upsample_mmx):
|
| - push ebp
|
| -Index: simd/jdsamss2.asm
|
| + ; --------------------------------------------------------------------------
|
| ++ SECTION SEG_TEXT
|
| ++ BITS 32
|
| ++
|
| + %include "jcclrmmx.asm"
|
| +
|
| + %undef RGB_RED
|
| +@@ -57,10 +60,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_RGB_RED
|
| ++%define RGB_GREEN EXT_RGB_GREEN
|
| ++%define RGB_BLUE EXT_RGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx
|
| + %include "jcclrmmx.asm"
|
| +
|
| +@@ -68,10 +71,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_RGBX_RED
|
| ++%define RGB_GREEN EXT_RGBX_GREEN
|
| ++%define RGB_BLUE EXT_RGBX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx
|
| + %include "jcclrmmx.asm"
|
| +
|
| +@@ -79,10 +82,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_BGR_RED
|
| ++%define RGB_GREEN EXT_BGR_GREEN
|
| ++%define RGB_BLUE EXT_BGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx
|
| + %include "jcclrmmx.asm"
|
| +
|
| +@@ -90,10 +93,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_BGRX_RED
|
| ++%define RGB_GREEN EXT_BGRX_GREEN
|
| ++%define RGB_BLUE EXT_BGRX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx
|
| + %include "jcclrmmx.asm"
|
| +
|
| +@@ -101,10 +104,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 3
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 1
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XBGR_RED
|
| ++%define RGB_GREEN EXT_XBGR_GREEN
|
| ++%define RGB_BLUE EXT_XBGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx
|
| + %include "jcclrmmx.asm"
|
| +
|
| +@@ -112,9 +115,9 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 1
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 3
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XRGB_RED
|
| ++%define RGB_GREEN EXT_XRGB_GREEN
|
| ++%define RGB_BLUE EXT_XRGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx
|
| + %include "jcclrmmx.asm"
|
| +Index: simd/jccolss2-64.asm
|
| ===================================================================
|
| ---- simd/jdsamss2.asm (revision 829)
|
| -+++ simd/jdsamss2.asm (working copy)
|
| -@@ -22,7 +22,7 @@
|
| +--- simd/jccolss2-64.asm (revision 829)
|
| ++++ simd/jccolss2-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jccolss2.asm - colorspace conversion (64-bit SSE2)
|
| ++; jccolss2-64.asm - colorspace conversion (64-bit SSE2)
|
| + ;
|
| + ; x86 SIMD extension for IJG JPEG library
|
| + ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| +@@ -34,7 +34,7 @@
|
| SECTION SEG_CONST
|
|
|
| alignz 16
|
| -- global EXTN(jconst_fancy_upsample_sse2)
|
| -+ global EXTN(jconst_fancy_upsample_sse2) PRIVATE
|
| +- global EXTN(jconst_rgb_ycc_convert_sse2)
|
| ++ global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE
|
|
|
| - EXTN(jconst_fancy_upsample_sse2):
|
| + EXTN(jconst_rgb_ycc_convert_sse2):
|
|
|
| -@@ -58,7 +58,7 @@
|
| - %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
| +@@ -48,6 +48,9 @@
|
| + alignz 16
|
|
|
| - align 16
|
| -- global EXTN(jsimd_h2v1_fancy_upsample_sse2)
|
| -+ global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE
|
| + ; --------------------------------------------------------------------------
|
| ++ SECTION SEG_TEXT
|
| ++ BITS 64
|
| ++
|
| + %include "jcclrss2-64.asm"
|
| +
|
| + %undef RGB_RED
|
| +@@ -54,10 +57,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_RGB_RED
|
| ++%define RGB_GREEN EXT_RGB_GREEN
|
| ++%define RGB_BLUE EXT_RGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2
|
| + %include "jcclrss2-64.asm"
|
| +
|
| +@@ -65,10 +68,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_RGBX_RED
|
| ++%define RGB_GREEN EXT_RGBX_GREEN
|
| ++%define RGB_BLUE EXT_RGBX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2
|
| + %include "jcclrss2-64.asm"
|
| +
|
| +@@ -76,10 +79,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_BGR_RED
|
| ++%define RGB_GREEN EXT_BGR_GREEN
|
| ++%define RGB_BLUE EXT_BGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2
|
| + %include "jcclrss2-64.asm"
|
| +
|
| +@@ -87,10 +90,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_BGRX_RED
|
| ++%define RGB_GREEN EXT_BGRX_GREEN
|
| ++%define RGB_BLUE EXT_BGRX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2
|
| + %include "jcclrss2-64.asm"
|
| +
|
| +@@ -98,10 +101,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 3
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 1
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XBGR_RED
|
| ++%define RGB_GREEN EXT_XBGR_GREEN
|
| ++%define RGB_BLUE EXT_XBGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2
|
| + %include "jcclrss2-64.asm"
|
| +
|
| +@@ -109,9 +112,9 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 1
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 3
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XRGB_RED
|
| ++%define RGB_GREEN EXT_XRGB_GREEN
|
| ++%define RGB_BLUE EXT_XRGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2
|
| + %include "jcclrss2-64.asm"
|
| +Index: simd/jccolss2.asm
|
| +===================================================================
|
| +--- simd/jccolss2.asm (revision 829)
|
| ++++ simd/jccolss2.asm (working copy)
|
| +@@ -34,7 +34,7 @@
|
| + SECTION SEG_CONST
|
|
|
| - EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
| - push ebp
|
| -@@ -214,7 +214,7 @@
|
| - %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
| + alignz 16
|
| +- global EXTN(jconst_rgb_ycc_convert_sse2)
|
| ++ global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE
|
|
|
| - align 16
|
| -- global EXTN(jsimd_h2v2_fancy_upsample_sse2)
|
| -+ global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE
|
| + EXTN(jconst_rgb_ycc_convert_sse2):
|
|
|
| - EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
| - push ebp
|
| -@@ -538,7 +538,7 @@
|
| - %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
| +@@ -48,6 +48,9 @@
|
| + alignz 16
|
| +
|
| + ; --------------------------------------------------------------------------
|
| ++ SECTION SEG_TEXT
|
| ++ BITS 32
|
| ++
|
| + %include "jcclrss2.asm"
|
| +
|
| + %undef RGB_RED
|
| +@@ -54,10 +57,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_RGB_RED
|
| ++%define RGB_GREEN EXT_RGB_GREEN
|
| ++%define RGB_BLUE EXT_RGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2
|
| + %include "jcclrss2.asm"
|
| +
|
| +@@ -65,10 +68,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_RGBX_RED
|
| ++%define RGB_GREEN EXT_RGBX_GREEN
|
| ++%define RGB_BLUE EXT_RGBX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2
|
| + %include "jcclrss2.asm"
|
| +
|
| +@@ -76,10 +79,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_BGR_RED
|
| ++%define RGB_GREEN EXT_BGR_GREEN
|
| ++%define RGB_BLUE EXT_BGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2
|
| + %include "jcclrss2.asm"
|
| +
|
| +@@ -87,10 +90,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_BGRX_RED
|
| ++%define RGB_GREEN EXT_BGRX_GREEN
|
| ++%define RGB_BLUE EXT_BGRX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2
|
| + %include "jcclrss2.asm"
|
| +
|
| +@@ -98,10 +101,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 3
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 1
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XBGR_RED
|
| ++%define RGB_GREEN EXT_XBGR_GREEN
|
| ++%define RGB_BLUE EXT_XBGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2
|
| + %include "jcclrss2.asm"
|
| +
|
| +@@ -109,9 +112,9 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 1
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 3
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XRGB_RED
|
| ++%define RGB_GREEN EXT_XRGB_GREEN
|
| ++%define RGB_BLUE EXT_XRGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| + %define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2
|
| + %include "jcclrss2.asm"
|
| +Index: simd/jcqnt3dn.asm
|
| +===================================================================
|
| +--- simd/jcqnt3dn.asm (revision 829)
|
| ++++ simd/jcqnt3dn.asm (working copy)
|
| +@@ -35,7 +35,7 @@
|
| + %define workspace ebp+16 ; FAST_FLOAT * workspace
|
|
|
| align 16
|
| -- global EXTN(jsimd_h2v1_upsample_sse2)
|
| -+ global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE
|
| +- global EXTN(jsimd_convsamp_float_3dnow)
|
| ++ global EXTN(jsimd_convsamp_float_3dnow) PRIVATE
|
|
|
| - EXTN(jsimd_h2v1_upsample_sse2):
|
| + EXTN(jsimd_convsamp_float_3dnow):
|
| push ebp
|
| -@@ -637,7 +637,7 @@
|
| - %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
| +@@ -138,7 +138,7 @@
|
| + %define workspace ebp+16 ; FAST_FLOAT * workspace
|
|
|
| align 16
|
| -- global EXTN(jsimd_h2v2_upsample_sse2)
|
| -+ global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE
|
| +- global EXTN(jsimd_quantize_float_3dnow)
|
| ++ global EXTN(jsimd_quantize_float_3dnow) PRIVATE
|
|
|
| - EXTN(jsimd_h2v2_upsample_sse2):
|
| + EXTN(jsimd_quantize_float_3dnow):
|
| push ebp
|
| -Index: simd/jiss2flt-64.asm
|
| -===================================================================
|
| ---- simd/jiss2flt-64.asm (revision 829)
|
| -+++ simd/jiss2flt-64.asm (working copy)
|
| -@@ -38,7 +38,7 @@
|
| - SECTION SEG_CONST
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_idct_float_sse2)
|
| -+ global EXTN(jconst_idct_float_sse2) PRIVATE
|
| +@@ -228,3 +228,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| - EXTN(jconst_idct_float_sse2):
|
| -
|
| -@@ -74,7 +74,7 @@
|
| - ; FAST_FLOAT workspace[DCTSIZE2]
|
| -
|
| - align 16
|
| -- global EXTN(jsimd_idct_float_sse2)
|
| -+ global EXTN(jsimd_idct_float_sse2) PRIVATE
|
| -
|
| - EXTN(jsimd_idct_float_sse2):
|
| - push rbp
|
| -Index: simd/jfss2int-64.asm
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jcqntmmx.asm
|
| ===================================================================
|
| ---- simd/jfss2int-64.asm (revision 829)
|
| -+++ simd/jfss2int-64.asm (working copy)
|
| -@@ -67,7 +67,7 @@
|
| - SECTION SEG_CONST
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_fdct_islow_sse2)
|
| -+ global EXTN(jconst_fdct_islow_sse2) PRIVATE
|
| +--- simd/jcqntmmx.asm (revision 829)
|
| ++++ simd/jcqntmmx.asm (working copy)
|
| +@@ -35,7 +35,7 @@
|
| + %define workspace ebp+16 ; DCTELEM * workspace
|
|
|
| - EXTN(jconst_fdct_islow_sse2):
|
| + align 16
|
| +- global EXTN(jsimd_convsamp_mmx)
|
| ++ global EXTN(jsimd_convsamp_mmx) PRIVATE
|
|
|
| -@@ -101,7 +101,7 @@
|
| - %define WK_NUM 6
|
| + EXTN(jsimd_convsamp_mmx):
|
| + push ebp
|
| +@@ -140,7 +140,7 @@
|
| + %define workspace ebp+16 ; DCTELEM * workspace
|
|
|
| align 16
|
| -- global EXTN(jsimd_fdct_islow_sse2)
|
| -+ global EXTN(jsimd_fdct_islow_sse2) PRIVATE
|
| +- global EXTN(jsimd_quantize_mmx)
|
| ++ global EXTN(jsimd_quantize_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_fdct_islow_sse2):
|
| - push rbp
|
| -Index: simd/jcqnts2f.asm
|
| + EXTN(jsimd_quantize_mmx):
|
| + push ebp
|
| +@@ -269,3 +269,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jcqnts2f-64.asm
|
| ===================================================================
|
| ---- simd/jcqnts2f.asm (revision 829)
|
| -+++ simd/jcqnts2f.asm (working copy)
|
| -@@ -35,7 +35,7 @@
|
| - %define workspace ebp+16 ; FAST_FLOAT * workspace
|
| +--- simd/jcqnts2f-64.asm (revision 829)
|
| ++++ simd/jcqnts2f-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jcqnts2f.asm - sample data conversion and quantization (64-bit SSE & SSE2)
|
| ++; jcqnts2f-64.asm - sample data conversion and quantization (64-bit SSE & SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -36,13 +36,14 @@
|
| + ; r12 = FAST_FLOAT * workspace
|
|
|
| align 16
|
| - global EXTN(jsimd_convsamp_float_sse2)
|
| + global EXTN(jsimd_convsamp_float_sse2) PRIVATE
|
|
|
| EXTN(jsimd_convsamp_float_sse2):
|
| - push ebp
|
| -@@ -115,7 +115,7 @@
|
| - %define workspace ebp+16 ; FAST_FLOAT * workspace
|
| + push rbp
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + pcmpeqw xmm7,xmm7
|
| + psllw xmm7,7
|
| +@@ -89,8 +90,8 @@
|
| + dec rcx
|
| + jnz short .convloop
|
| +
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + pop rbp
|
| + ret
|
| +
|
| +@@ -109,10 +110,11 @@
|
| + ; r12 = FAST_FLOAT * workspace
|
|
|
| align 16
|
| - global EXTN(jsimd_quantize_float_sse2)
|
| + global EXTN(jsimd_quantize_float_sse2) PRIVATE
|
|
|
| EXTN(jsimd_quantize_float_sse2):
|
| - push ebp
|
| -Index: simd/jdmrgss2.asm
|
| + push rbp
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| + collect_args
|
| +
|
| +@@ -150,3 +152,7 @@
|
| + uncollect_args
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jcqnts2f.asm
|
| ===================================================================
|
| ---- simd/jdmrgss2.asm (revision 829)
|
| -+++ simd/jdmrgss2.asm (working copy)
|
| -@@ -40,7 +40,7 @@
|
| - %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
| -
|
| - align 16
|
| -- global EXTN(jsimd_h2v1_merged_upsample_sse2)
|
| -+ global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE
|
| -
|
| - EXTN(jsimd_h2v1_merged_upsample_sse2):
|
| - push ebp
|
| -@@ -560,7 +560,7 @@
|
| - %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
|
| +--- simd/jcqnts2f.asm (revision 829)
|
| ++++ simd/jcqnts2f.asm (working copy)
|
| +@@ -35,7 +35,7 @@
|
| + %define workspace ebp+16 ; FAST_FLOAT * workspace
|
|
|
| align 16
|
| -- global EXTN(jsimd_h2v2_merged_upsample_sse2)
|
| -+ global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE
|
| +- global EXTN(jsimd_convsamp_float_sse2)
|
| ++ global EXTN(jsimd_convsamp_float_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_h2v2_merged_upsample_sse2):
|
| + EXTN(jsimd_convsamp_float_sse2):
|
| push ebp
|
| -Index: simd/jfmmxint.asm
|
| -===================================================================
|
| ---- simd/jfmmxint.asm (revision 829)
|
| -+++ simd/jfmmxint.asm (working copy)
|
| -@@ -66,7 +66,7 @@
|
| - SECTION SEG_CONST
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_fdct_islow_mmx)
|
| -+ global EXTN(jconst_fdct_islow_mmx) PRIVATE
|
| -
|
| - EXTN(jconst_fdct_islow_mmx):
|
| -
|
| -@@ -101,7 +101,7 @@
|
| - %define WK_NUM 2
|
| +@@ -115,7 +115,7 @@
|
| + %define workspace ebp+16 ; FAST_FLOAT * workspace
|
|
|
| align 16
|
| -- global EXTN(jsimd_fdct_islow_mmx)
|
| -+ global EXTN(jsimd_fdct_islow_mmx) PRIVATE
|
| +- global EXTN(jsimd_quantize_float_sse2)
|
| ++ global EXTN(jsimd_quantize_float_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_fdct_islow_mmx):
|
| + EXTN(jsimd_quantize_float_sse2):
|
| push ebp
|
| -Index: simd/jcgryss2-64.asm
|
| +@@ -166,3 +166,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jcqnts2i-64.asm
|
| ===================================================================
|
| ---- simd/jcgryss2-64.asm (revision 829)
|
| -+++ simd/jcgryss2-64.asm (working copy)
|
| -@@ -37,7 +37,7 @@
|
| +--- simd/jcqnts2i-64.asm (revision 829)
|
| ++++ simd/jcqnts2i-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jcqnts2i.asm - sample data conversion and quantization (64-bit SSE2)
|
| ++; jcqnts2i-64.asm - sample data conversion and quantization (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -36,13 +36,14 @@
|
| + ; r12 = DCTELEM * workspace
|
|
|
| align 16
|
| +- global EXTN(jsimd_convsamp_sse2)
|
| ++ global EXTN(jsimd_convsamp_sse2) PRIVATE
|
|
|
| -- global EXTN(jsimd_rgb_gray_convert_sse2)
|
| -+ global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE
|
| + EXTN(jsimd_convsamp_sse2):
|
| + push rbp
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + pxor xmm6,xmm6 ; xmm6=(all 0's)
|
| + pcmpeqw xmm7,xmm7
|
| +@@ -84,8 +85,8 @@
|
| + dec rcx
|
| + jnz short .convloop
|
| +
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + pop rbp
|
| + ret
|
| +
|
| +@@ -111,10 +112,11 @@
|
| + ; r12 = DCTELEM * workspace
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_quantize_sse2)
|
| ++ global EXTN(jsimd_quantize_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_rgb_gray_convert_sse2):
|
| + EXTN(jsimd_quantize_sse2):
|
| push rbp
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| + collect_args
|
| +
|
| +@@ -179,3 +181,7 @@
|
| + uncollect_args
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| Index: simd/jcqnts2i.asm
|
| ===================================================================
|
| --- simd/jcqnts2i.asm (revision 829)
|
| @@ -726,112 +11650,92 @@ Index: simd/jcqnts2i.asm
|
|
|
| EXTN(jsimd_quantize_sse2):
|
| push ebp
|
| -Index: simd/jiss2fst-64.asm
|
| -===================================================================
|
| ---- simd/jiss2fst-64.asm (revision 829)
|
| -+++ simd/jiss2fst-64.asm (working copy)
|
| -@@ -60,7 +60,7 @@
|
| - %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_idct_ifast_sse2)
|
| -+ global EXTN(jconst_idct_ifast_sse2) PRIVATE
|
| -
|
| - EXTN(jconst_idct_ifast_sse2):
|
| -
|
| -@@ -93,7 +93,7 @@
|
| - %define WK_NUM 2
|
| +@@ -195,3 +195,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| - align 16
|
| -- global EXTN(jsimd_idct_ifast_sse2)
|
| -+ global EXTN(jsimd_idct_ifast_sse2) PRIVATE
|
| -
|
| - EXTN(jsimd_idct_ifast_sse2):
|
| - push rbp
|
| -Index: simd/jiss2flt.asm
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jcqntsse.asm
|
| ===================================================================
|
| ---- simd/jiss2flt.asm (revision 829)
|
| -+++ simd/jiss2flt.asm (working copy)
|
| -@@ -37,7 +37,7 @@
|
| - SECTION SEG_CONST
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_idct_float_sse2)
|
| -+ global EXTN(jconst_idct_float_sse2) PRIVATE
|
| -
|
| - EXTN(jconst_idct_float_sse2):
|
| -
|
| -@@ -73,7 +73,7 @@
|
| - ; FAST_FLOAT workspace[DCTSIZE2]
|
| +--- simd/jcqntsse.asm (revision 829)
|
| ++++ simd/jcqntsse.asm (working copy)
|
| +@@ -35,7 +35,7 @@
|
| + %define workspace ebp+16 ; FAST_FLOAT * workspace
|
|
|
| align 16
|
| -- global EXTN(jsimd_idct_float_sse2)
|
| -+ global EXTN(jsimd_idct_float_sse2) PRIVATE
|
| +- global EXTN(jsimd_convsamp_float_sse)
|
| ++ global EXTN(jsimd_convsamp_float_sse) PRIVATE
|
|
|
| - EXTN(jsimd_idct_float_sse2):
|
| + EXTN(jsimd_convsamp_float_sse):
|
| push ebp
|
| -Index: simd/jiss2int.asm
|
| -===================================================================
|
| ---- simd/jiss2int.asm (revision 829)
|
| -+++ simd/jiss2int.asm (working copy)
|
| -@@ -66,7 +66,7 @@
|
| - SECTION SEG_CONST
|
| +@@ -138,7 +138,7 @@
|
| + %define workspace ebp+16 ; FAST_FLOAT * workspace
|
|
|
| - alignz 16
|
| -- global EXTN(jconst_idct_islow_sse2)
|
| -+ global EXTN(jconst_idct_islow_sse2) PRIVATE
|
| + align 16
|
| +- global EXTN(jsimd_quantize_float_sse)
|
| ++ global EXTN(jsimd_quantize_float_sse) PRIVATE
|
|
|
| - EXTN(jconst_idct_islow_sse2):
|
| + EXTN(jsimd_quantize_float_sse):
|
| + push ebp
|
| +@@ -206,3 +206,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| -@@ -105,7 +105,7 @@
|
| - %define WK_NUM 12
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jcsammmx.asm
|
| +===================================================================
|
| +--- simd/jcsammmx.asm (revision 829)
|
| ++++ simd/jcsammmx.asm (working copy)
|
| +@@ -40,7 +40,7 @@
|
| + %define output_data(b) (b)+28 ; JSAMPARRAY output_data
|
|
|
| align 16
|
| -- global EXTN(jsimd_idct_islow_sse2)
|
| -+ global EXTN(jsimd_idct_islow_sse2) PRIVATE
|
| +- global EXTN(jsimd_h2v1_downsample_mmx)
|
| ++ global EXTN(jsimd_h2v1_downsample_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_idct_islow_sse2):
|
| + EXTN(jsimd_h2v1_downsample_mmx):
|
| push ebp
|
| -Index: simd/jfsseflt-64.asm
|
| -===================================================================
|
| ---- simd/jfsseflt-64.asm (revision 829)
|
| -+++ simd/jfsseflt-64.asm (working copy)
|
| -@@ -38,7 +38,7 @@
|
| - SECTION SEG_CONST
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_fdct_float_sse)
|
| -+ global EXTN(jconst_fdct_float_sse) PRIVATE
|
| +@@ -95,7 +95,7 @@
|
|
|
| - EXTN(jconst_fdct_float_sse):
|
| + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr
|
| + test eax,eax
|
| +- jle short .return
|
| ++ jle near .return
|
|
|
| -@@ -65,7 +65,7 @@
|
| - %define WK_NUM 2
|
| + mov edx, 0x00010000 ; bias pattern
|
| + movd mm7,edx
|
| +@@ -182,7 +182,7 @@
|
| + %define output_data(b) (b)+28 ; JSAMPARRAY output_data
|
|
|
| align 16
|
| -- global EXTN(jsimd_fdct_float_sse)
|
| -+ global EXTN(jsimd_fdct_float_sse) PRIVATE
|
| -
|
| - EXTN(jsimd_fdct_float_sse):
|
| - push rbp
|
| -Index: simd/jccolss2-64.asm
|
| -===================================================================
|
| ---- simd/jccolss2-64.asm (revision 829)
|
| -+++ simd/jccolss2-64.asm (working copy)
|
| -@@ -34,7 +34,7 @@
|
| - SECTION SEG_CONST
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_rgb_ycc_convert_sse2)
|
| -+ global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE
|
| +- global EXTN(jsimd_h2v2_downsample_mmx)
|
| ++ global EXTN(jsimd_h2v2_downsample_mmx) PRIVATE
|
|
|
| - EXTN(jconst_rgb_ycc_convert_sse2):
|
| + EXTN(jsimd_h2v2_downsample_mmx):
|
| + push ebp
|
| +@@ -319,3 +319,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| Index: simd/jcsamss2-64.asm
|
| ===================================================================
|
| --- simd/jcsamss2-64.asm (revision 829)
|
| +++ simd/jcsamss2-64.asm (working copy)
|
| -@@ -41,7 +41,7 @@
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jcsamss2.asm - downsampling (64-bit SSE2)
|
| ++; jcsamss2-64.asm - downsampling (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -41,10 +41,11 @@
|
| ; r15 = JSAMPARRAY output_data
|
|
|
| align 16
|
| @@ -840,7 +11744,11 @@ Index: simd/jcsamss2-64.asm
|
|
|
| EXTN(jsimd_h2v1_downsample_sse2):
|
| push rbp
|
| -@@ -185,7 +185,7 @@
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| + collect_args
|
| +
|
| +@@ -184,10 +185,11 @@
|
| ; r15 = JSAMPARRAY output_data
|
|
|
| align 16
|
| @@ -849,11 +11757,102 @@ Index: simd/jcsamss2-64.asm
|
|
|
| EXTN(jsimd_h2v2_downsample_sse2):
|
| push rbp
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| + collect_args
|
| +
|
| +@@ -322,3 +324,7 @@
|
| + uncollect_args
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jcsamss2.asm
|
| +===================================================================
|
| +--- simd/jcsamss2.asm (revision 829)
|
| ++++ simd/jcsamss2.asm (working copy)
|
| +@@ -40,7 +40,7 @@
|
| + %define output_data(b) (b)+28 ; JSAMPARRAY output_data
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_h2v1_downsample_sse2)
|
| ++ global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE
|
| +
|
| + EXTN(jsimd_h2v1_downsample_sse2):
|
| + push ebp
|
| +@@ -195,7 +195,7 @@
|
| + %define output_data(b) (b)+28 ; JSAMPARRAY output_data
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_h2v2_downsample_sse2)
|
| ++ global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE
|
| +
|
| + EXTN(jsimd_h2v2_downsample_sse2):
|
| + push ebp
|
| +@@ -346,3 +346,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jdclrmmx.asm
|
| +===================================================================
|
| +--- simd/jdclrmmx.asm (revision 829)
|
| ++++ simd/jdclrmmx.asm (working copy)
|
| +@@ -19,8 +19,6 @@
|
| + %include "jcolsamp.inc"
|
| +
|
| + ; --------------------------------------------------------------------------
|
| +- SECTION SEG_TEXT
|
| +- BITS 32
|
| + ;
|
| + ; Convert some rows of samples to the output colorspace.
|
| + ;
|
| +@@ -42,7 +40,7 @@
|
| + %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_ycc_rgb_convert_mmx)
|
| ++ global EXTN(jsimd_ycc_rgb_convert_mmx) PRIVATE
|
| +
|
| + EXTN(jsimd_ycc_rgb_convert_mmx):
|
| + push ebp
|
| +@@ -402,3 +400,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| Index: simd/jdclrss2-64.asm
|
| ===================================================================
|
| --- simd/jdclrss2-64.asm (revision 829)
|
| +++ simd/jdclrss2-64.asm (working copy)
|
| -@@ -39,7 +39,7 @@
|
| +@@ -1,8 +1,8 @@
|
| + ;
|
| +-; jdclrss2.asm - colorspace conversion (64-bit SSE2)
|
| ++; jdclrss2-64.asm - colorspace conversion (64-bit SSE2)
|
| + ;
|
| +-; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +-; Copyright 2009 D. R. Commander
|
| ++; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++; Copyright 2009, 2012 D. R. Commander
|
| + ;
|
| + ; Based on
|
| + ; x86 SIMD extension for IJG JPEG library
|
| +@@ -20,8 +20,6 @@
|
| + %include "jcolsamp.inc"
|
| +
|
| + ; --------------------------------------------------------------------------
|
| +- SECTION SEG_TEXT
|
| +- BITS 64
|
| + ;
|
| + ; Convert some rows of samples to the output colorspace.
|
| + ;
|
| +@@ -41,7 +39,7 @@
|
| %define WK_NUM 2
|
|
|
| align 16
|
| @@ -862,76 +11861,776 @@ Index: simd/jdclrss2-64.asm
|
|
|
| EXTN(jsimd_ycc_rgb_convert_sse2):
|
| push rbp
|
| -Index: simd/jdcolmmx.asm
|
| -===================================================================
|
| ---- simd/jdcolmmx.asm (revision 829)
|
| -+++ simd/jdcolmmx.asm (working copy)
|
| -@@ -35,7 +35,7 @@
|
| - SECTION SEG_CONST
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_ycc_rgb_convert_mmx)
|
| -+ global EXTN(jconst_ycc_rgb_convert_mmx) PRIVATE
|
| -
|
| - EXTN(jconst_ycc_rgb_convert_mmx):
|
| -
|
| -Index: simd/jcclrmmx.asm
|
| +@@ -51,8 +49,8 @@
|
| + mov [rsp],rax
|
| + mov rbp,rsp ; rbp = aligned rbp
|
| + lea rsp, [wk(0)]
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + mov rcx, r10 ; num_cols
|
| + test rcx,rcx
|
| +@@ -72,7 +70,7 @@
|
| + pop rcx
|
| +
|
| + mov rdi, r13
|
| +- mov rax, r14
|
| ++ mov eax, r14d
|
| + test rax,rax
|
| + jle near .return
|
| + .rowloop:
|
| +@@ -253,17 +251,13 @@
|
| + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
|
| +- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + jmp short .out0
|
| + .out1: ; --(unaligned)-----------------
|
| +- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [rdi], xmmF
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| ++ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
|
| + .out0:
|
| ++ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + sub rcx, byte SIZEOF_XMMWORD
|
| + jz near .nextrow
|
| +
|
| +@@ -273,14 +267,12 @@
|
| + jmp near .columnloop
|
| +
|
| + .column_st32:
|
| +- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
| + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE
|
| + cmp rcx, byte 2*SIZEOF_XMMWORD
|
| + jb short .column_st16
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| ++ add rdi, byte 2*SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmF
|
| + sub rcx, byte 2*SIZEOF_XMMWORD
|
| + jmp short .column_st15
|
| +@@ -287,50 +279,44 @@
|
| + .column_st16:
|
| + cmp rcx, byte SIZEOF_XMMWORD
|
| + jb short .column_st15
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| + add rdi, byte SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmD
|
| + sub rcx, byte SIZEOF_XMMWORD
|
| + .column_st15:
|
| +- mov rax,rcx
|
| +- xor rcx, byte 0x0F
|
| +- shl rcx, 2
|
| +- movd xmmB,ecx
|
| +- psrlq xmmH,4
|
| +- pcmpeqb xmmE,xmmE
|
| +- psrlq xmmH,xmmB
|
| +- psrlq xmmE,xmmB
|
| +- punpcklbw xmmE,xmmH
|
| +- ; ----------------
|
| +- mov rcx,rdi
|
| +- and rcx, byte SIZEOF_XMMWORD-1
|
| +- jz short .adj0
|
| +- add rax,rcx
|
| +- cmp rax, byte SIZEOF_XMMWORD
|
| +- ja short .adj0
|
| +- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
| +- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,rcx
|
| +- movdqa xmmG,xmmA
|
| +- movdqa xmmC,xmmE
|
| +- pslldq xmmA, SIZEOF_XMMWORD/2
|
| +- pslldq xmmE, SIZEOF_XMMWORD/2
|
| +- movd xmmD,ecx
|
| +- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
| +- jb short .adj1
|
| +- movd xmmF,ecx
|
| +- psllq xmmA,xmmF
|
| +- psllq xmmE,xmmF
|
| +- jmp short .adj0
|
| +-.adj1: neg ecx
|
| +- movd xmmF,ecx
|
| +- psrlq xmmA,xmmF
|
| +- psrlq xmmE,xmmF
|
| +- psllq xmmG,xmmD
|
| +- psllq xmmC,xmmD
|
| +- por xmmA,xmmG
|
| +- por xmmE,xmmC
|
| +-.adj0: ; ----------------
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
|
| ++ ; Store the lower 8 bytes of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp rcx, byte SIZEOF_MMWORD
|
| ++ jb short .column_st7
|
| ++ movq XMM_MMWORD [rdi], xmmA
|
| ++ add rdi, byte SIZEOF_MMWORD
|
| ++ sub rcx, byte SIZEOF_MMWORD
|
| ++ psrldq xmmA, SIZEOF_MMWORD
|
| ++.column_st7:
|
| ++ ; Store the lower 4 bytes of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp rcx, byte SIZEOF_DWORD
|
| ++ jb short .column_st3
|
| ++ movd XMM_DWORD [rdi], xmmA
|
| ++ add rdi, byte SIZEOF_DWORD
|
| ++ sub rcx, byte SIZEOF_DWORD
|
| ++ psrldq xmmA, SIZEOF_DWORD
|
| ++.column_st3:
|
| ++ ; Store the lower 2 bytes of rax to the output when it has enough
|
| ++ ; space.
|
| ++ movd eax, xmmA
|
| ++ cmp rcx, byte SIZEOF_WORD
|
| ++ jb short .column_st1
|
| ++ mov WORD [rdi], ax
|
| ++ add rdi, byte SIZEOF_WORD
|
| ++ sub rcx, byte SIZEOF_WORD
|
| ++ shr rax, 16
|
| ++.column_st1:
|
| ++ ; Store the lower 1 byte of rax to the output when it has enough
|
| ++ ; space.
|
| ++ test rcx, rcx
|
| ++ jz short .nextrow
|
| ++ mov BYTE [rdi], al
|
| +
|
| + %else ; RGB_PIXELSIZE == 4 ; -----------
|
| +
|
| +@@ -375,19 +361,14 @@
|
| + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
|
| + movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
|
| +- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + jmp short .out0
|
| + .out1: ; --(unaligned)-----------------
|
| +- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [rdi], xmmC
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [rdi], xmmH
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| ++ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
|
| ++ movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
|
| + .out0:
|
| ++ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + sub rcx, byte SIZEOF_XMMWORD
|
| + jz near .nextrow
|
| +
|
| +@@ -397,13 +378,11 @@
|
| + jmp near .columnloop
|
| +
|
| + .column_st32:
|
| +- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
| + cmp rcx, byte SIZEOF_XMMWORD/2
|
| + jb short .column_st16
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| ++ add rdi, byte 2*SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmC
|
| + movdqa xmmD,xmmH
|
| + sub rcx, byte SIZEOF_XMMWORD/2
|
| +@@ -410,50 +389,25 @@
|
| + .column_st16:
|
| + cmp rcx, byte SIZEOF_XMMWORD/4
|
| + jb short .column_st15
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| + add rdi, byte SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmD
|
| + sub rcx, byte SIZEOF_XMMWORD/4
|
| + .column_st15:
|
| +- cmp rcx, byte SIZEOF_XMMWORD/16
|
| +- jb near .nextrow
|
| +- mov rax,rcx
|
| +- xor rcx, byte 0x03
|
| +- inc rcx
|
| +- shl rcx, 4
|
| +- movd xmmF,ecx
|
| +- psrlq xmmE,xmmF
|
| +- punpcklbw xmmE,xmmE
|
| +- ; ----------------
|
| +- mov rcx,rdi
|
| +- and rcx, byte SIZEOF_XMMWORD-1
|
| +- jz short .adj0
|
| +- lea rax, [rcx+rax*4] ; RGB_PIXELSIZE
|
| +- cmp rax, byte SIZEOF_XMMWORD
|
| +- ja short .adj0
|
| +- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
| +- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
| +- movdqa xmmB,xmmA
|
| +- movdqa xmmG,xmmE
|
| +- pslldq xmmA, SIZEOF_XMMWORD/2
|
| +- pslldq xmmE, SIZEOF_XMMWORD/2
|
| +- movd xmmC,ecx
|
| +- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
| +- jb short .adj1
|
| +- movd xmmH,ecx
|
| +- psllq xmmA,xmmH
|
| +- psllq xmmE,xmmH
|
| +- jmp short .adj0
|
| +-.adj1: neg rcx
|
| +- movd xmmH,ecx
|
| +- psrlq xmmA,xmmH
|
| +- psrlq xmmE,xmmH
|
| +- psllq xmmB,xmmC
|
| +- psllq xmmG,xmmC
|
| +- por xmmA,xmmB
|
| +- por xmmE,xmmG
|
| +-.adj0: ; ----------------
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
|
| ++ ; Store two pixels (8 bytes) of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp rcx, byte SIZEOF_XMMWORD/8
|
| ++ jb short .column_st7
|
| ++ movq MMWORD [rdi], xmmA
|
| ++ add rdi, byte SIZEOF_XMMWORD/8*4
|
| ++ sub rcx, byte SIZEOF_XMMWORD/8
|
| ++ psrldq xmmA, SIZEOF_XMMWORD/8*4
|
| ++.column_st7:
|
| ++ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ test rcx, rcx
|
| ++ jz short .nextrow
|
| ++ movd XMM_DWORD [rdi], xmmA
|
| +
|
| + %endif ; RGB_PIXELSIZE ; ---------------
|
| +
|
| +@@ -475,9 +429,13 @@
|
| + sfence ; flush the write buffer
|
| +
|
| + .return:
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + mov rsp,rbp ; rsp <- aligned rbp
|
| + pop rsp ; rsp <- original rbp
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jdclrss2.asm
|
| ===================================================================
|
| ---- simd/jcclrmmx.asm (revision 829)
|
| -+++ simd/jcclrmmx.asm (working copy)
|
| -@@ -40,7 +40,7 @@
|
| +--- simd/jdclrss2.asm (revision 829)
|
| ++++ simd/jdclrss2.asm (working copy)
|
| +@@ -1,7 +1,8 @@
|
| + ;
|
| + ; jdclrss2.asm - colorspace conversion (SSE2)
|
| + ;
|
| +-; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++; Copyright 2012 D. R. Commander
|
| + ;
|
| + ; Based on
|
| + ; x86 SIMD extension for IJG JPEG library
|
| +@@ -19,8 +20,6 @@
|
| + %include "jcolsamp.inc"
|
| +
|
| + ; --------------------------------------------------------------------------
|
| +- SECTION SEG_TEXT
|
| +- BITS 32
|
| + ;
|
| + ; Convert some rows of samples to the output colorspace.
|
| + ;
|
| +@@ -42,7 +41,7 @@
|
| %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
|
|
| align 16
|
| -- global EXTN(jsimd_rgb_ycc_convert_mmx)
|
| -+ global EXTN(jsimd_rgb_ycc_convert_mmx) PRIVATE
|
| +- global EXTN(jsimd_ycc_rgb_convert_sse2)
|
| ++ global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_rgb_ycc_convert_mmx):
|
| + EXTN(jsimd_ycc_rgb_convert_sse2):
|
| push ebp
|
| -Index: simd/jfsseflt.asm
|
| +@@ -264,17 +263,13 @@
|
| + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
|
| +- add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + jmp short .out0
|
| + .out1: ; --(unaligned)-----------------
|
| +- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| ++ movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
|
| + .out0:
|
| ++ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + sub ecx, byte SIZEOF_XMMWORD
|
| + jz near .nextrow
|
| +
|
| +@@ -285,14 +280,12 @@
|
| + alignx 16,7
|
| +
|
| + .column_st32:
|
| +- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
| + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
| + cmp ecx, byte 2*SIZEOF_XMMWORD
|
| + jb short .column_st16
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| ++ add edi, byte 2*SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmF
|
| + sub ecx, byte 2*SIZEOF_XMMWORD
|
| + jmp short .column_st15
|
| +@@ -299,50 +292,44 @@
|
| + .column_st16:
|
| + cmp ecx, byte SIZEOF_XMMWORD
|
| + jb short .column_st15
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| + add edi, byte SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmD
|
| + sub ecx, byte SIZEOF_XMMWORD
|
| + .column_st15:
|
| +- mov eax,ecx
|
| +- xor ecx, byte 0x0F
|
| +- shl ecx, 2
|
| +- movd xmmB,ecx
|
| +- psrlq xmmH,4
|
| +- pcmpeqb xmmE,xmmE
|
| +- psrlq xmmH,xmmB
|
| +- psrlq xmmE,xmmB
|
| +- punpcklbw xmmE,xmmH
|
| +- ; ----------------
|
| +- mov ecx,edi
|
| +- and ecx, byte SIZEOF_XMMWORD-1
|
| +- jz short .adj0
|
| +- add eax,ecx
|
| +- cmp eax, byte SIZEOF_XMMWORD
|
| +- ja short .adj0
|
| +- and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
| +- shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
| +- movdqa xmmG,xmmA
|
| +- movdqa xmmC,xmmE
|
| +- pslldq xmmA, SIZEOF_XMMWORD/2
|
| +- pslldq xmmE, SIZEOF_XMMWORD/2
|
| +- movd xmmD,ecx
|
| +- sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
| +- jb short .adj1
|
| +- movd xmmF,ecx
|
| +- psllq xmmA,xmmF
|
| +- psllq xmmE,xmmF
|
| +- jmp short .adj0
|
| +-.adj1: neg ecx
|
| +- movd xmmF,ecx
|
| +- psrlq xmmA,xmmF
|
| +- psrlq xmmE,xmmF
|
| +- psllq xmmG,xmmD
|
| +- psllq xmmC,xmmD
|
| +- por xmmA,xmmG
|
| +- por xmmE,xmmC
|
| +-.adj0: ; ----------------
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| ++ ; Store the lower 8 bytes of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp ecx, byte SIZEOF_MMWORD
|
| ++ jb short .column_st7
|
| ++ movq XMM_MMWORD [edi], xmmA
|
| ++ add edi, byte SIZEOF_MMWORD
|
| ++ sub ecx, byte SIZEOF_MMWORD
|
| ++ psrldq xmmA, SIZEOF_MMWORD
|
| ++.column_st7:
|
| ++ ; Store the lower 4 bytes of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp ecx, byte SIZEOF_DWORD
|
| ++ jb short .column_st3
|
| ++ movd XMM_DWORD [edi], xmmA
|
| ++ add edi, byte SIZEOF_DWORD
|
| ++ sub ecx, byte SIZEOF_DWORD
|
| ++ psrldq xmmA, SIZEOF_DWORD
|
| ++.column_st3:
|
| ++ ; Store the lower 2 bytes of eax to the output when it has enough
|
| ++ ; space.
|
| ++ movd eax, xmmA
|
| ++ cmp ecx, byte SIZEOF_WORD
|
| ++ jb short .column_st1
|
| ++ mov WORD [edi], ax
|
| ++ add edi, byte SIZEOF_WORD
|
| ++ sub ecx, byte SIZEOF_WORD
|
| ++ shr eax, 16
|
| ++.column_st1:
|
| ++ ; Store the lower 1 byte of eax to the output when it has enough
|
| ++ ; space.
|
| ++ test ecx, ecx
|
| ++ jz short .nextrow
|
| ++ mov BYTE [edi], al
|
| +
|
| + %else ; RGB_PIXELSIZE == 4 ; -----------
|
| +
|
| +@@ -387,19 +374,14 @@
|
| + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
|
| + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
|
| +- add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + jmp short .out0
|
| + .out1: ; --(unaligned)-----------------
|
| +- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| ++ movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
|
| ++ movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
|
| + .out0:
|
| ++ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + sub ecx, byte SIZEOF_XMMWORD
|
| + jz near .nextrow
|
| +
|
| +@@ -410,13 +392,11 @@
|
| + alignx 16,7
|
| +
|
| + .column_st32:
|
| +- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
| + cmp ecx, byte SIZEOF_XMMWORD/2
|
| + jb short .column_st16
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| ++ add edi, byte 2*SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmC
|
| + movdqa xmmD,xmmH
|
| + sub ecx, byte SIZEOF_XMMWORD/2
|
| +@@ -423,50 +403,25 @@
|
| + .column_st16:
|
| + cmp ecx, byte SIZEOF_XMMWORD/4
|
| + jb short .column_st15
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| + add edi, byte SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmD
|
| + sub ecx, byte SIZEOF_XMMWORD/4
|
| + .column_st15:
|
| +- cmp ecx, byte SIZEOF_XMMWORD/16
|
| +- jb short .nextrow
|
| +- mov eax,ecx
|
| +- xor ecx, byte 0x03
|
| +- inc ecx
|
| +- shl ecx, 4
|
| +- movd xmmF,ecx
|
| +- psrlq xmmE,xmmF
|
| +- punpcklbw xmmE,xmmE
|
| +- ; ----------------
|
| +- mov ecx,edi
|
| +- and ecx, byte SIZEOF_XMMWORD-1
|
| +- jz short .adj0
|
| +- lea eax, [ecx+eax*4] ; RGB_PIXELSIZE
|
| +- cmp eax, byte SIZEOF_XMMWORD
|
| +- ja short .adj0
|
| +- and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
| +- shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
| +- movdqa xmmB,xmmA
|
| +- movdqa xmmG,xmmE
|
| +- pslldq xmmA, SIZEOF_XMMWORD/2
|
| +- pslldq xmmE, SIZEOF_XMMWORD/2
|
| +- movd xmmC,ecx
|
| +- sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
| +- jb short .adj1
|
| +- movd xmmH,ecx
|
| +- psllq xmmA,xmmH
|
| +- psllq xmmE,xmmH
|
| +- jmp short .adj0
|
| +-.adj1: neg ecx
|
| +- movd xmmH,ecx
|
| +- psrlq xmmA,xmmH
|
| +- psrlq xmmE,xmmH
|
| +- psllq xmmB,xmmC
|
| +- psllq xmmG,xmmC
|
| +- por xmmA,xmmB
|
| +- por xmmE,xmmG
|
| +-.adj0: ; ----------------
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| ++ ; Store two pixels (8 bytes) of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp ecx, byte SIZEOF_XMMWORD/8
|
| ++ jb short .column_st7
|
| ++ movq XMM_MMWORD [edi], xmmA
|
| ++ add edi, byte SIZEOF_XMMWORD/8*4
|
| ++ sub ecx, byte SIZEOF_XMMWORD/8
|
| ++ psrldq xmmA, SIZEOF_XMMWORD/8*4
|
| ++.column_st7:
|
| ++ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ test ecx, ecx
|
| ++ jz short .nextrow
|
| ++ movd XMM_DWORD [edi], xmmA
|
| +
|
| + %endif ; RGB_PIXELSIZE ; ---------------
|
| +
|
| +@@ -500,3 +455,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jdcolmmx.asm
|
| ===================================================================
|
| ---- simd/jfsseflt.asm (revision 829)
|
| -+++ simd/jfsseflt.asm (working copy)
|
| -@@ -37,7 +37,7 @@
|
| +--- simd/jdcolmmx.asm (revision 829)
|
| ++++ simd/jdcolmmx.asm (working copy)
|
| +@@ -35,7 +35,7 @@
|
| SECTION SEG_CONST
|
|
|
| - alignz 16
|
| -- global EXTN(jconst_fdct_float_sse)
|
| -+ global EXTN(jconst_fdct_float_sse) PRIVATE
|
| -
|
| - EXTN(jconst_fdct_float_sse):
|
| + alignz 16
|
| +- global EXTN(jconst_ycc_rgb_convert_mmx)
|
| ++ global EXTN(jconst_ycc_rgb_convert_mmx) PRIVATE
|
|
|
| -@@ -65,7 +65,7 @@
|
| - %define WK_NUM 2
|
| + EXTN(jconst_ycc_rgb_convert_mmx):
|
|
|
| - align 16
|
| -- global EXTN(jsimd_fdct_float_sse)
|
| -+ global EXTN(jsimd_fdct_float_sse) PRIVATE
|
| +@@ -48,6 +48,9 @@
|
| + alignz 16
|
|
|
| - EXTN(jsimd_fdct_float_sse):
|
| - push ebp
|
| -Index: simd/jdmrgss2-64.asm
|
| + ; --------------------------------------------------------------------------
|
| ++ SECTION SEG_TEXT
|
| ++ BITS 32
|
| ++
|
| + %include "jdclrmmx.asm"
|
| +
|
| + %undef RGB_RED
|
| +@@ -54,10 +57,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_RGB_RED
|
| ++%define RGB_GREEN EXT_RGB_GREEN
|
| ++%define RGB_BLUE EXT_RGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx
|
| + %include "jdclrmmx.asm"
|
| +
|
| +@@ -65,10 +68,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_RGBX_RED
|
| ++%define RGB_GREEN EXT_RGBX_GREEN
|
| ++%define RGB_BLUE EXT_RGBX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx
|
| + %include "jdclrmmx.asm"
|
| +
|
| +@@ -76,10 +79,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_BGR_RED
|
| ++%define RGB_GREEN EXT_BGR_GREEN
|
| ++%define RGB_BLUE EXT_BGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx
|
| + %include "jdclrmmx.asm"
|
| +
|
| +@@ -87,10 +90,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_BGRX_RED
|
| ++%define RGB_GREEN EXT_BGRX_GREEN
|
| ++%define RGB_BLUE EXT_BGRX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx
|
| + %include "jdclrmmx.asm"
|
| +
|
| +@@ -98,10 +101,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 3
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 1
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XBGR_RED
|
| ++%define RGB_GREEN EXT_XBGR_GREEN
|
| ++%define RGB_BLUE EXT_XBGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx
|
| + %include "jdclrmmx.asm"
|
| +
|
| +@@ -109,9 +112,9 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 1
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 3
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XRGB_RED
|
| ++%define RGB_GREEN EXT_XRGB_GREEN
|
| ++%define RGB_BLUE EXT_XRGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx
|
| + %include "jdclrmmx.asm"
|
| +Index: simd/jdcolss2-64.asm
|
| ===================================================================
|
| ---- simd/jdmrgss2-64.asm (revision 829)
|
| -+++ simd/jdmrgss2-64.asm (working copy)
|
| -@@ -39,7 +39,7 @@
|
| - %define WK_NUM 3
|
| +--- simd/jdcolss2-64.asm (revision 829)
|
| ++++ simd/jdcolss2-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jdcolss2.asm - colorspace conversion (64-bit SSE2)
|
| ++; jdcolss2-64.asm - colorspace conversion (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -35,7 +35,7 @@
|
| + SECTION SEG_CONST
|
|
|
| - align 16
|
| -- global EXTN(jsimd_h2v1_merged_upsample_sse2)
|
| -+ global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE
|
| + alignz 16
|
| +- global EXTN(jconst_ycc_rgb_convert_sse2)
|
| ++ global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_h2v1_merged_upsample_sse2):
|
| - push rbp
|
| -@@ -543,7 +543,7 @@
|
| - ; r13 = JSAMPARRAY output_buf
|
| + EXTN(jconst_ycc_rgb_convert_sse2):
|
|
|
| - align 16
|
| -- global EXTN(jsimd_h2v2_merged_upsample_sse2)
|
| -+ global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE
|
| +@@ -48,6 +48,9 @@
|
| + alignz 16
|
|
|
| - EXTN(jsimd_h2v2_merged_upsample_sse2):
|
| - push rbp
|
| + ; --------------------------------------------------------------------------
|
| ++ SECTION SEG_TEXT
|
| ++ BITS 64
|
| ++
|
| + %include "jdclrss2-64.asm"
|
| +
|
| + %undef RGB_RED
|
| +@@ -54,10 +57,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_RGB_RED
|
| ++%define RGB_GREEN EXT_RGB_GREEN
|
| ++%define RGB_BLUE EXT_RGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2
|
| + %include "jdclrss2-64.asm"
|
| +
|
| +@@ -65,10 +68,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_RGBX_RED
|
| ++%define RGB_GREEN EXT_RGBX_GREEN
|
| ++%define RGB_BLUE EXT_RGBX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2
|
| + %include "jdclrss2-64.asm"
|
| +
|
| +@@ -76,10 +79,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_BGR_RED
|
| ++%define RGB_GREEN EXT_BGR_GREEN
|
| ++%define RGB_BLUE EXT_BGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2
|
| + %include "jdclrss2-64.asm"
|
| +
|
| +@@ -87,10 +90,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_BGRX_RED
|
| ++%define RGB_GREEN EXT_BGRX_GREEN
|
| ++%define RGB_BLUE EXT_BGRX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2
|
| + %include "jdclrss2-64.asm"
|
| +
|
| +@@ -98,10 +101,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 3
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 1
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XBGR_RED
|
| ++%define RGB_GREEN EXT_XBGR_GREEN
|
| ++%define RGB_BLUE EXT_XBGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2
|
| + %include "jdclrss2-64.asm"
|
| +
|
| +@@ -109,9 +112,9 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 1
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 3
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XRGB_RED
|
| ++%define RGB_GREEN EXT_XRGB_GREEN
|
| ++%define RGB_BLUE EXT_XRGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2
|
| + %include "jdclrss2-64.asm"
|
| Index: simd/jdcolss2.asm
|
| ===================================================================
|
| --- simd/jdcolss2.asm (revision 829)
|
| @@ -945,6 +12644,105 @@ Index: simd/jdcolss2.asm
|
|
|
| EXTN(jconst_ycc_rgb_convert_sse2):
|
|
|
| +@@ -48,6 +48,9 @@
|
| + alignz 16
|
| +
|
| + ; --------------------------------------------------------------------------
|
| ++ SECTION SEG_TEXT
|
| ++ BITS 32
|
| ++
|
| + %include "jdclrss2.asm"
|
| +
|
| + %undef RGB_RED
|
| +@@ -54,10 +57,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_RGB_RED
|
| ++%define RGB_GREEN EXT_RGB_GREEN
|
| ++%define RGB_BLUE EXT_RGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2
|
| + %include "jdclrss2.asm"
|
| +
|
| +@@ -65,10 +68,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_RGBX_RED
|
| ++%define RGB_GREEN EXT_RGBX_GREEN
|
| ++%define RGB_BLUE EXT_RGBX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2
|
| + %include "jdclrss2.asm"
|
| +
|
| +@@ -76,10 +79,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_BGR_RED
|
| ++%define RGB_GREEN EXT_BGR_GREEN
|
| ++%define RGB_BLUE EXT_BGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2
|
| + %include "jdclrss2.asm"
|
| +
|
| +@@ -87,10 +90,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_BGRX_RED
|
| ++%define RGB_GREEN EXT_BGRX_GREEN
|
| ++%define RGB_BLUE EXT_BGRX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2
|
| + %include "jdclrss2.asm"
|
| +
|
| +@@ -98,10 +101,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 3
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 1
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XBGR_RED
|
| ++%define RGB_GREEN EXT_XBGR_GREEN
|
| ++%define RGB_BLUE EXT_XBGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2
|
| + %include "jdclrss2.asm"
|
| +
|
| +@@ -109,9 +112,9 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 1
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 3
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XRGB_RED
|
| ++%define RGB_GREEN EXT_XRGB_GREEN
|
| ++%define RGB_BLUE EXT_XRGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| + %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2
|
| + %include "jdclrss2.asm"
|
| Index: simd/jdmermmx.asm
|
| ===================================================================
|
| --- simd/jdmermmx.asm (revision 829)
|
| @@ -958,50 +12756,226 @@ Index: simd/jdmermmx.asm
|
|
|
| EXTN(jconst_merged_upsample_mmx):
|
|
|
| -Index: simd/jcclrss2.asm
|
| -===================================================================
|
| ---- simd/jcclrss2.asm (revision 829)
|
| -+++ simd/jcclrss2.asm (working copy)
|
| -@@ -38,7 +38,7 @@
|
| -
|
| - align 16
|
| -
|
| -- global EXTN(jsimd_rgb_ycc_convert_sse2)
|
| -+ global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE
|
| +@@ -48,6 +48,9 @@
|
| + alignz 16
|
|
|
| - EXTN(jsimd_rgb_ycc_convert_sse2):
|
| - push ebp
|
| -Index: simd/jiss2red.asm
|
| + ; --------------------------------------------------------------------------
|
| ++ SECTION SEG_TEXT
|
| ++ BITS 32
|
| ++
|
| + %include "jdmrgmmx.asm"
|
| +
|
| + %undef RGB_RED
|
| +@@ -54,10 +57,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_RGB_RED
|
| ++%define RGB_GREEN EXT_RGB_GREEN
|
| ++%define RGB_BLUE EXT_RGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgb_merged_upsample_mmx
|
| + %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgb_merged_upsample_mmx
|
| + %include "jdmrgmmx.asm"
|
| +@@ -66,10 +69,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_RGBX_RED
|
| ++%define RGB_GREEN EXT_RGBX_GREEN
|
| ++%define RGB_BLUE EXT_RGBX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgbx_merged_upsample_mmx
|
| + %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgbx_merged_upsample_mmx
|
| + %include "jdmrgmmx.asm"
|
| +@@ -78,10 +81,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_BGR_RED
|
| ++%define RGB_GREEN EXT_BGR_GREEN
|
| ++%define RGB_BLUE EXT_BGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgr_merged_upsample_mmx
|
| + %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgr_merged_upsample_mmx
|
| + %include "jdmrgmmx.asm"
|
| +@@ -90,10 +93,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_BGRX_RED
|
| ++%define RGB_GREEN EXT_BGRX_GREEN
|
| ++%define RGB_BLUE EXT_BGRX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgrx_merged_upsample_mmx
|
| + %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgrx_merged_upsample_mmx
|
| + %include "jdmrgmmx.asm"
|
| +@@ -102,10 +105,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 3
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 1
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XBGR_RED
|
| ++%define RGB_GREEN EXT_XBGR_GREEN
|
| ++%define RGB_BLUE EXT_XBGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxbgr_merged_upsample_mmx
|
| + %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxbgr_merged_upsample_mmx
|
| + %include "jdmrgmmx.asm"
|
| +@@ -114,10 +117,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 1
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 3
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XRGB_RED
|
| ++%define RGB_GREEN EXT_XRGB_GREEN
|
| ++%define RGB_BLUE EXT_XRGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxrgb_merged_upsample_mmx
|
| + %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxrgb_merged_upsample_mmx
|
| + %include "jdmrgmmx.asm"
|
| +Index: simd/jdmerss2-64.asm
|
| ===================================================================
|
| ---- simd/jiss2red.asm (revision 829)
|
| -+++ simd/jiss2red.asm (working copy)
|
| -@@ -72,7 +72,7 @@
|
| +--- simd/jdmerss2-64.asm (revision 829)
|
| ++++ simd/jdmerss2-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jdmerss2.asm - merged upsampling/color conversion (64-bit SSE2)
|
| ++; jdmerss2-64.asm - merged upsampling/color conversion (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -35,7 +35,7 @@
|
| SECTION SEG_CONST
|
|
|
| alignz 16
|
| -- global EXTN(jconst_idct_red_sse2)
|
| -+ global EXTN(jconst_idct_red_sse2) PRIVATE
|
| -
|
| - EXTN(jconst_idct_red_sse2):
|
| -
|
| -@@ -113,7 +113,7 @@
|
| - %define WK_NUM 2
|
| -
|
| - align 16
|
| -- global EXTN(jsimd_idct_4x4_sse2)
|
| -+ global EXTN(jsimd_idct_4x4_sse2) PRIVATE
|
| +- global EXTN(jconst_merged_upsample_sse2)
|
| ++ global EXTN(jconst_merged_upsample_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_idct_4x4_sse2):
|
| - push ebp
|
| -@@ -424,7 +424,7 @@
|
| - %define output_col(b) (b)+20 ; JDIMENSION output_col
|
| + EXTN(jconst_merged_upsample_sse2):
|
|
|
| - align 16
|
| -- global EXTN(jsimd_idct_2x2_sse2)
|
| -+ global EXTN(jsimd_idct_2x2_sse2) PRIVATE
|
| +@@ -48,6 +48,9 @@
|
| + alignz 16
|
|
|
| - EXTN(jsimd_idct_2x2_sse2):
|
| - push ebp
|
| + ; --------------------------------------------------------------------------
|
| ++ SECTION SEG_TEXT
|
| ++ BITS 64
|
| ++
|
| + %include "jdmrgss2-64.asm"
|
| +
|
| + %undef RGB_RED
|
| +@@ -54,10 +57,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_RGB_RED
|
| ++%define RGB_GREEN EXT_RGB_GREEN
|
| ++%define RGB_BLUE EXT_RGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2
|
| + %include "jdmrgss2-64.asm"
|
| +@@ -66,10 +69,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_RGBX_RED
|
| ++%define RGB_GREEN EXT_RGBX_GREEN
|
| ++%define RGB_BLUE EXT_RGBX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2
|
| + %include "jdmrgss2-64.asm"
|
| +@@ -78,10 +81,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_BGR_RED
|
| ++%define RGB_GREEN EXT_BGR_GREEN
|
| ++%define RGB_BLUE EXT_BGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2
|
| + %include "jdmrgss2-64.asm"
|
| +@@ -90,10 +93,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_BGRX_RED
|
| ++%define RGB_GREEN EXT_BGRX_GREEN
|
| ++%define RGB_BLUE EXT_BGRX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2
|
| + %include "jdmrgss2-64.asm"
|
| +@@ -102,10 +105,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 3
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 1
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XBGR_RED
|
| ++%define RGB_GREEN EXT_XBGR_GREEN
|
| ++%define RGB_BLUE EXT_XBGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2
|
| + %include "jdmrgss2-64.asm"
|
| +@@ -114,10 +117,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 1
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 3
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XRGB_RED
|
| ++%define RGB_GREEN EXT_XRGB_GREEN
|
| ++%define RGB_BLUE EXT_XRGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2
|
| + %include "jdmrgss2-64.asm"
|
| Index: simd/jdmerss2.asm
|
| ===================================================================
|
| --- simd/jdmerss2.asm (revision 829)
|
| @@ -1015,146 +12989,854 @@ Index: simd/jdmerss2.asm
|
|
|
| EXTN(jconst_merged_upsample_sse2):
|
|
|
| -Index: simd/jfss2fst-64.asm
|
| -===================================================================
|
| ---- simd/jfss2fst-64.asm (revision 829)
|
| -+++ simd/jfss2fst-64.asm (working copy)
|
| -@@ -53,7 +53,7 @@
|
| - %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
| -
|
| +@@ -48,6 +48,9 @@
|
| alignz 16
|
| -- global EXTN(jconst_fdct_ifast_sse2)
|
| -+ global EXTN(jconst_fdct_ifast_sse2) PRIVATE
|
|
|
| - EXTN(jconst_fdct_ifast_sse2):
|
| + ; --------------------------------------------------------------------------
|
| ++ SECTION SEG_TEXT
|
| ++ BITS 32
|
| ++
|
| + %include "jdmrgss2.asm"
|
| +
|
| + %undef RGB_RED
|
| +@@ -54,10 +57,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_RGB_RED
|
| ++%define RGB_GREEN EXT_RGB_GREEN
|
| ++%define RGB_BLUE EXT_RGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2
|
| + %include "jdmrgss2.asm"
|
| +@@ -66,10 +69,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 0
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 2
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_RGBX_RED
|
| ++%define RGB_GREEN EXT_RGBX_GREEN
|
| ++%define RGB_BLUE EXT_RGBX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2
|
| + %include "jdmrgss2.asm"
|
| +@@ -78,10 +81,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 3
|
| ++%define RGB_RED EXT_BGR_RED
|
| ++%define RGB_GREEN EXT_BGR_GREEN
|
| ++%define RGB_BLUE EXT_BGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2
|
| + %include "jdmrgss2.asm"
|
| +@@ -90,10 +93,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 2
|
| +-%define RGB_GREEN 1
|
| +-%define RGB_BLUE 0
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_BGRX_RED
|
| ++%define RGB_GREEN EXT_BGRX_GREEN
|
| ++%define RGB_BLUE EXT_BGRX_BLUE
|
| ++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2
|
| + %include "jdmrgss2.asm"
|
| +@@ -102,10 +105,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 3
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 1
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XBGR_RED
|
| ++%define RGB_GREEN EXT_XBGR_GREEN
|
| ++%define RGB_BLUE EXT_XBGR_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2
|
| + %include "jdmrgss2.asm"
|
| +@@ -114,10 +117,10 @@
|
| + %undef RGB_GREEN
|
| + %undef RGB_BLUE
|
| + %undef RGB_PIXELSIZE
|
| +-%define RGB_RED 1
|
| +-%define RGB_GREEN 2
|
| +-%define RGB_BLUE 3
|
| +-%define RGB_PIXELSIZE 4
|
| ++%define RGB_RED EXT_XRGB_RED
|
| ++%define RGB_GREEN EXT_XRGB_GREEN
|
| ++%define RGB_BLUE EXT_XRGB_BLUE
|
| ++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| + %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2
|
| + %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2
|
| + %include "jdmrgss2.asm"
|
| +Index: simd/jdmrgmmx.asm
|
| +===================================================================
|
| +--- simd/jdmrgmmx.asm (revision 829)
|
| ++++ simd/jdmrgmmx.asm (working copy)
|
| +@@ -19,8 +19,6 @@
|
| + %include "jcolsamp.inc"
|
|
|
| -@@ -80,7 +80,7 @@
|
| - %define WK_NUM 2
|
| + ; --------------------------------------------------------------------------
|
| +- SECTION SEG_TEXT
|
| +- BITS 32
|
| + ;
|
| + ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
|
| + ;
|
| +@@ -42,7 +40,7 @@
|
| + %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
|
|
| align 16
|
| -- global EXTN(jsimd_fdct_ifast_sse2)
|
| -+ global EXTN(jsimd_fdct_ifast_sse2) PRIVATE
|
| +- global EXTN(jsimd_h2v1_merged_upsample_mmx)
|
| ++ global EXTN(jsimd_h2v1_merged_upsample_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_fdct_ifast_sse2):
|
| - push rbp
|
| -Index: simd/jcqntmmx.asm
|
| -===================================================================
|
| ---- simd/jcqntmmx.asm (revision 829)
|
| -+++ simd/jcqntmmx.asm (working copy)
|
| -@@ -35,7 +35,7 @@
|
| - %define workspace ebp+16 ; DCTELEM * workspace
|
| + EXTN(jsimd_h2v1_merged_upsample_mmx):
|
| + push ebp
|
| +@@ -253,7 +251,7 @@
|
| + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC
|
|
|
| - align 16
|
| -- global EXTN(jsimd_convsamp_mmx)
|
| -+ global EXTN(jsimd_convsamp_mmx) PRIVATE
|
| + sub ecx, byte SIZEOF_MMWORD
|
| +- jz short .endcolumn
|
| ++ jz near .endcolumn
|
|
|
| - EXTN(jsimd_convsamp_mmx):
|
| - push ebp
|
| -@@ -140,7 +140,7 @@
|
| - %define workspace ebp+16 ; DCTELEM * workspace
|
| + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
|
| + add esi, byte SIZEOF_MMWORD ; inptr0
|
| +@@ -411,7 +409,7 @@
|
| + %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
|
|
|
| align 16
|
| -- global EXTN(jsimd_quantize_mmx)
|
| -+ global EXTN(jsimd_quantize_mmx) PRIVATE
|
| +- global EXTN(jsimd_h2v2_merged_upsample_mmx)
|
| ++ global EXTN(jsimd_h2v2_merged_upsample_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_quantize_mmx):
|
| + EXTN(jsimd_h2v2_merged_upsample_mmx):
|
| push ebp
|
| -Index: simd/jimmxfst.asm
|
| -===================================================================
|
| ---- simd/jimmxfst.asm (revision 829)
|
| -+++ simd/jimmxfst.asm (working copy)
|
| -@@ -59,7 +59,7 @@
|
| - %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
| +@@ -461,3 +459,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| - alignz 16
|
| -- global EXTN(jconst_idct_ifast_mmx)
|
| -+ global EXTN(jconst_idct_ifast_mmx) PRIVATE
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jdmrgss2-64.asm
|
| +===================================================================
|
| +--- simd/jdmrgss2-64.asm (revision 829)
|
| ++++ simd/jdmrgss2-64.asm (working copy)
|
| +@@ -1,8 +1,8 @@
|
| + ;
|
| +-; jdmrgss2.asm - merged upsampling/color conversion (64-bit SSE2)
|
| ++; jdmrgss2-64.asm - merged upsampling/color conversion (64-bit SSE2)
|
| + ;
|
| +-; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +-; Copyright 2009 D. R. Commander
|
| ++; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++; Copyright 2009, 2012 D. R. Commander
|
| + ;
|
| + ; Based on
|
| + ; x86 SIMD extension for IJG JPEG library
|
| +@@ -20,8 +20,6 @@
|
| + %include "jcolsamp.inc"
|
| +
|
| + ; --------------------------------------------------------------------------
|
| +- SECTION SEG_TEXT
|
| +- BITS 64
|
| + ;
|
| + ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
|
| + ;
|
| +@@ -41,7 +39,7 @@
|
| + %define WK_NUM 3
|
|
|
| - EXTN(jconst_idct_ifast_mmx):
|
| + align 16
|
| +- global EXTN(jsimd_h2v1_merged_upsample_sse2)
|
| ++ global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE
|
|
|
| -@@ -94,7 +94,7 @@
|
| - ; JCOEF workspace[DCTSIZE2]
|
| + EXTN(jsimd_h2v1_merged_upsample_sse2):
|
| + push rbp
|
| +@@ -51,8 +49,8 @@
|
| + mov [rsp],rax
|
| + mov rbp,rsp ; rbp = aligned rbp
|
| + lea rsp, [wk(0)]
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + mov rcx, r10 ; col
|
| + test rcx,rcx
|
| +@@ -254,17 +252,13 @@
|
| + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
|
| +- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + jmp short .out0
|
| + .out1: ; --(unaligned)-----------------
|
| +- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [rdi], xmmF
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| ++ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
|
| + .out0:
|
| ++ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + sub rcx, byte SIZEOF_XMMWORD
|
| + jz near .endcolumn
|
| +
|
| +@@ -277,14 +271,12 @@
|
| + jmp near .columnloop
|
| +
|
| + .column_st32:
|
| +- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
| + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE
|
| + cmp rcx, byte 2*SIZEOF_XMMWORD
|
| + jb short .column_st16
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| ++ add rdi, byte 2*SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmF
|
| + sub rcx, byte 2*SIZEOF_XMMWORD
|
| + jmp short .column_st15
|
| +@@ -291,50 +283,44 @@
|
| + .column_st16:
|
| + cmp rcx, byte SIZEOF_XMMWORD
|
| + jb short .column_st15
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| + add rdi, byte SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmD
|
| + sub rcx, byte SIZEOF_XMMWORD
|
| + .column_st15:
|
| +- mov rax,rcx
|
| +- xor rcx, byte 0x0F
|
| +- shl rcx, 2
|
| +- movd xmmB,ecx
|
| +- psrlq xmmH,4
|
| +- pcmpeqb xmmE,xmmE
|
| +- psrlq xmmH,xmmB
|
| +- psrlq xmmE,xmmB
|
| +- punpcklbw xmmE,xmmH
|
| +- ; ----------------
|
| +- mov rcx,rdi
|
| +- and rcx, byte SIZEOF_XMMWORD-1
|
| +- jz short .adj0
|
| +- add rax,rcx
|
| +- cmp rax, byte SIZEOF_XMMWORD
|
| +- ja short .adj0
|
| +- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
| +- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
| +- movdqa xmmG,xmmA
|
| +- movdqa xmmC,xmmE
|
| +- pslldq xmmA, SIZEOF_XMMWORD/2
|
| +- pslldq xmmE, SIZEOF_XMMWORD/2
|
| +- movd xmmD,ecx
|
| +- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
| +- jb short .adj1
|
| +- movd xmmF,ecx
|
| +- psllq xmmA,xmmF
|
| +- psllq xmmE,xmmF
|
| +- jmp short .adj0
|
| +-.adj1: neg rcx
|
| +- movd xmmF,ecx
|
| +- psrlq xmmA,xmmF
|
| +- psrlq xmmE,xmmF
|
| +- psllq xmmG,xmmD
|
| +- psllq xmmC,xmmD
|
| +- por xmmA,xmmG
|
| +- por xmmE,xmmC
|
| +-.adj0: ; ----------------
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| ++ ; Store the lower 8 bytes of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp rcx, byte SIZEOF_MMWORD
|
| ++ jb short .column_st7
|
| ++ movq XMM_MMWORD [rdi], xmmA
|
| ++ add rdi, byte SIZEOF_MMWORD
|
| ++ sub rcx, byte SIZEOF_MMWORD
|
| ++ psrldq xmmA, SIZEOF_MMWORD
|
| ++.column_st7:
|
| ++ ; Store the lower 4 bytes of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp rcx, byte SIZEOF_DWORD
|
| ++ jb short .column_st3
|
| ++ movd XMM_DWORD [rdi], xmmA
|
| ++ add rdi, byte SIZEOF_DWORD
|
| ++ sub rcx, byte SIZEOF_DWORD
|
| ++ psrldq xmmA, SIZEOF_DWORD
|
| ++.column_st3:
|
| ++ ; Store the lower 2 bytes of rax to the output when it has enough
|
| ++ ; space.
|
| ++ movd eax, xmmA
|
| ++ cmp rcx, byte SIZEOF_WORD
|
| ++ jb short .column_st1
|
| ++ mov WORD [rdi], ax
|
| ++ add rdi, byte SIZEOF_WORD
|
| ++ sub rcx, byte SIZEOF_WORD
|
| ++ shr rax, 16
|
| ++.column_st1:
|
| ++ ; Store the lower 1 byte of rax to the output when it has enough
|
| ++ ; space.
|
| ++ test rcx, rcx
|
| ++ jz short .endcolumn
|
| ++ mov BYTE [rdi], al
|
| +
|
| + %else ; RGB_PIXELSIZE == 4 ; -----------
|
| +
|
| +@@ -379,19 +365,14 @@
|
| + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
|
| + movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
|
| +- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + jmp short .out0
|
| + .out1: ; --(unaligned)-----------------
|
| +- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [rdi], xmmC
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [rdi], xmmH
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| ++ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
|
| ++ movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
|
| + .out0:
|
| ++ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + sub rcx, byte SIZEOF_XMMWORD
|
| + jz near .endcolumn
|
| +
|
| +@@ -404,13 +385,11 @@
|
| + jmp near .columnloop
|
| +
|
| + .column_st32:
|
| +- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
| + cmp rcx, byte SIZEOF_XMMWORD/2
|
| + jb short .column_st16
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD
|
| +- add rdi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
|
| ++ add rdi, byte 2*SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmC
|
| + movdqa xmmD,xmmH
|
| + sub rcx, byte SIZEOF_XMMWORD/2
|
| +@@ -417,50 +396,25 @@
|
| + .column_st16:
|
| + cmp rcx, byte SIZEOF_XMMWORD/4
|
| + jb short .column_st15
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| ++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
|
| + add rdi, byte SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmD
|
| + sub rcx, byte SIZEOF_XMMWORD/4
|
| + .column_st15:
|
| +- cmp rcx, byte SIZEOF_XMMWORD/16
|
| +- jb near .endcolumn
|
| +- mov rax,rcx
|
| +- xor rcx, byte 0x03
|
| +- inc rcx
|
| +- shl rcx, 4
|
| +- movd xmmF,ecx
|
| +- psrlq xmmE,xmmF
|
| +- punpcklbw xmmE,xmmE
|
| +- ; ----------------
|
| +- mov rcx,rdi
|
| +- and rcx, byte SIZEOF_XMMWORD-1
|
| +- jz short .adj0
|
| +- lea rax, [rcx+rax*4] ; RGB_PIXELSIZE
|
| +- cmp rax, byte SIZEOF_XMMWORD
|
| +- ja short .adj0
|
| +- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
| +- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
| +- movdqa xmmB,xmmA
|
| +- movdqa xmmG,xmmE
|
| +- pslldq xmmA, SIZEOF_XMMWORD/2
|
| +- pslldq xmmE, SIZEOF_XMMWORD/2
|
| +- movd xmmC,ecx
|
| +- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
| +- jb short .adj1
|
| +- movd xmmH,ecx
|
| +- psllq xmmA,xmmH
|
| +- psllq xmmE,xmmH
|
| +- jmp short .adj0
|
| +-.adj1: neg rcx
|
| +- movd xmmH,ecx
|
| +- psrlq xmmA,xmmH
|
| +- psrlq xmmE,xmmH
|
| +- psllq xmmB,xmmC
|
| +- psllq xmmG,xmmC
|
| +- por xmmA,xmmB
|
| +- por xmmE,xmmG
|
| +-.adj0: ; ----------------
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| ++ ; Store two pixels (8 bytes) of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp rcx, byte SIZEOF_XMMWORD/8
|
| ++ jb short .column_st7
|
| ++ movq XMM_MMWORD [rdi], xmmA
|
| ++ add rdi, byte SIZEOF_XMMWORD/8*4
|
| ++ sub rcx, byte SIZEOF_XMMWORD/8
|
| ++ psrldq xmmA, SIZEOF_XMMWORD/8*4
|
| ++.column_st7:
|
| ++ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ test rcx, rcx
|
| ++ jz short .endcolumn
|
| ++ movd XMM_DWORD [rdi], xmmA
|
| +
|
| + %endif ; RGB_PIXELSIZE ; ---------------
|
| +
|
| +@@ -468,8 +422,8 @@
|
| + sfence ; flush the write buffer
|
| +
|
| + .return:
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + mov rsp,rbp ; rsp <- aligned rbp
|
| + pop rsp ; rsp <- original rbp
|
| + pop rbp
|
| +@@ -492,13 +446,14 @@
|
| + ; r13 = JSAMPARRAY output_buf
|
|
|
| align 16
|
| -- global EXTN(jsimd_idct_ifast_mmx)
|
| -+ global EXTN(jsimd_idct_ifast_mmx) PRIVATE
|
| +- global EXTN(jsimd_h2v2_merged_upsample_sse2)
|
| ++ global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_idct_ifast_mmx):
|
| - push ebp
|
| -Index: simd/jfss2fst.asm
|
| + EXTN(jsimd_h2v2_merged_upsample_sse2):
|
| + push rbp
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + mov rax, r10
|
| +
|
| +@@ -519,10 +474,17 @@
|
| + push rcx
|
| + push rax
|
| +
|
| ++ %ifdef WIN64
|
| ++ mov r8, rcx
|
| ++ mov r9, rdi
|
| ++ mov rcx, rax
|
| ++ mov rdx, rbx
|
| ++ %else
|
| + mov rdx, rcx
|
| + mov rcx, rdi
|
| + mov rdi, rax
|
| + mov rsi, rbx
|
| ++ %endif
|
| +
|
| + call EXTN(jsimd_h2v1_merged_upsample_sse2)
|
| +
|
| +@@ -545,10 +507,17 @@
|
| + push rcx
|
| + push rax
|
| +
|
| ++ %ifdef WIN64
|
| ++ mov r8, rcx
|
| ++ mov r9, rdi
|
| ++ mov rcx, rax
|
| ++ mov rdx, rbx
|
| ++ %else
|
| + mov rdx, rcx
|
| + mov rcx, rdi
|
| + mov rdi, rax
|
| + mov rsi, rbx
|
| ++ %endif
|
| +
|
| + call EXTN(jsimd_h2v1_merged_upsample_sse2)
|
| +
|
| +@@ -559,7 +528,11 @@
|
| + pop rbx
|
| + pop rdx
|
| +
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jdmrgss2.asm
|
| ===================================================================
|
| ---- simd/jfss2fst.asm (revision 829)
|
| -+++ simd/jfss2fst.asm (working copy)
|
| -@@ -52,7 +52,7 @@
|
| - %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
| -
|
| - alignz 16
|
| -- global EXTN(jconst_fdct_ifast_sse2)
|
| -+ global EXTN(jconst_fdct_ifast_sse2) PRIVATE
|
| +--- simd/jdmrgss2.asm (revision 829)
|
| ++++ simd/jdmrgss2.asm (working copy)
|
| +@@ -1,7 +1,8 @@
|
| + ;
|
| + ; jdmrgss2.asm - merged upsampling/color conversion (SSE2)
|
| + ;
|
| +-; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++; Copyright 2012 D. R. Commander
|
| + ;
|
| + ; Based on
|
| + ; x86 SIMD extension for IJG JPEG library
|
| +@@ -19,8 +20,6 @@
|
| + %include "jcolsamp.inc"
|
| +
|
| + ; --------------------------------------------------------------------------
|
| +- SECTION SEG_TEXT
|
| +- BITS 32
|
| + ;
|
| + ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
|
| + ;
|
| +@@ -42,7 +41,7 @@
|
| + %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
|
|
| - EXTN(jconst_fdct_ifast_sse2):
|
| + align 16
|
| +- global EXTN(jsimd_h2v1_merged_upsample_sse2)
|
| ++ global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE
|
|
|
| -@@ -80,7 +80,7 @@
|
| - %define WK_NUM 2
|
| + EXTN(jsimd_h2v1_merged_upsample_sse2):
|
| + push ebp
|
| +@@ -266,17 +265,13 @@
|
| + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
|
| +- add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + jmp short .out0
|
| + .out1: ; --(unaligned)-----------------
|
| +- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| ++ movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
|
| + .out0:
|
| ++ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + sub ecx, byte SIZEOF_XMMWORD
|
| + jz near .endcolumn
|
| +
|
| +@@ -290,14 +285,12 @@
|
| + alignx 16,7
|
| +
|
| + .column_st32:
|
| +- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
| + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
| + cmp ecx, byte 2*SIZEOF_XMMWORD
|
| + jb short .column_st16
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| ++ add edi, byte 2*SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmF
|
| + sub ecx, byte 2*SIZEOF_XMMWORD
|
| + jmp short .column_st15
|
| +@@ -304,50 +297,44 @@
|
| + .column_st16:
|
| + cmp ecx, byte SIZEOF_XMMWORD
|
| + jb short .column_st15
|
| +- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| + add edi, byte SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmD
|
| + sub ecx, byte SIZEOF_XMMWORD
|
| + .column_st15:
|
| +- mov eax,ecx
|
| +- xor ecx, byte 0x0F
|
| +- shl ecx, 2
|
| +- movd xmmB,ecx
|
| +- psrlq xmmH,4
|
| +- pcmpeqb xmmE,xmmE
|
| +- psrlq xmmH,xmmB
|
| +- psrlq xmmE,xmmB
|
| +- punpcklbw xmmE,xmmH
|
| +- ; ----------------
|
| +- mov ecx,edi
|
| +- and ecx, byte SIZEOF_XMMWORD-1
|
| +- jz short .adj0
|
| +- add eax,ecx
|
| +- cmp eax, byte SIZEOF_XMMWORD
|
| +- ja short .adj0
|
| +- and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
| +- shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
| +- movdqa xmmG,xmmA
|
| +- movdqa xmmC,xmmE
|
| +- pslldq xmmA, SIZEOF_XMMWORD/2
|
| +- pslldq xmmE, SIZEOF_XMMWORD/2
|
| +- movd xmmD,ecx
|
| +- sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
| +- jb short .adj1
|
| +- movd xmmF,ecx
|
| +- psllq xmmA,xmmF
|
| +- psllq xmmE,xmmF
|
| +- jmp short .adj0
|
| +-.adj1: neg ecx
|
| +- movd xmmF,ecx
|
| +- psrlq xmmA,xmmF
|
| +- psrlq xmmE,xmmF
|
| +- psllq xmmG,xmmD
|
| +- psllq xmmC,xmmD
|
| +- por xmmA,xmmG
|
| +- por xmmE,xmmC
|
| +-.adj0: ; ----------------
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| ++ ; Store the lower 8 bytes of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp ecx, byte SIZEOF_MMWORD
|
| ++ jb short .column_st7
|
| ++ movq XMM_MMWORD [edi], xmmA
|
| ++ add edi, byte SIZEOF_MMWORD
|
| ++ sub ecx, byte SIZEOF_MMWORD
|
| ++ psrldq xmmA, SIZEOF_MMWORD
|
| ++.column_st7:
|
| ++ ; Store the lower 4 bytes of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp ecx, byte SIZEOF_DWORD
|
| ++ jb short .column_st3
|
| ++ movd XMM_DWORD [edi], xmmA
|
| ++ add edi, byte SIZEOF_DWORD
|
| ++ sub ecx, byte SIZEOF_DWORD
|
| ++ psrldq xmmA, SIZEOF_DWORD
|
| ++.column_st3:
|
| ++ ; Store the lower 2 bytes of eax to the output when it has enough
|
| ++ ; space.
|
| ++ movd eax, xmmA
|
| ++ cmp ecx, byte SIZEOF_WORD
|
| ++ jb short .column_st1
|
| ++ mov WORD [edi], ax
|
| ++ add edi, byte SIZEOF_WORD
|
| ++ sub ecx, byte SIZEOF_WORD
|
| ++ shr eax, 16
|
| ++.column_st1:
|
| ++ ; Store the lower 1 byte of eax to the output when it has enough
|
| ++ ; space.
|
| ++ test ecx, ecx
|
| ++ jz short .endcolumn
|
| ++ mov BYTE [edi], al
|
| +
|
| + %else ; RGB_PIXELSIZE == 4 ; -----------
|
| +
|
| +@@ -392,19 +379,14 @@
|
| + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
|
| + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
|
| +- add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + jmp short .out0
|
| + .out1: ; --(unaligned)-----------------
|
| +- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| ++ movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
|
| ++ movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
|
| + .out0:
|
| ++ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
| + sub ecx, byte SIZEOF_XMMWORD
|
| + jz near .endcolumn
|
| +
|
| +@@ -418,13 +400,11 @@
|
| + alignx 16,7
|
| +
|
| + .column_st32:
|
| +- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
| + cmp ecx, byte SIZEOF_XMMWORD/2
|
| + jb short .column_st16
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| +- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
|
| +- add edi, byte SIZEOF_XMMWORD ; outptr
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| ++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
| ++ add edi, byte 2*SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmC
|
| + movdqa xmmD,xmmH
|
| + sub ecx, byte SIZEOF_XMMWORD/2
|
| +@@ -431,50 +411,25 @@
|
| + .column_st16:
|
| + cmp ecx, byte SIZEOF_XMMWORD/4
|
| + jb short .column_st15
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| ++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
| + add edi, byte SIZEOF_XMMWORD ; outptr
|
| + movdqa xmmA,xmmD
|
| + sub ecx, byte SIZEOF_XMMWORD/4
|
| + .column_st15:
|
| +- cmp ecx, byte SIZEOF_XMMWORD/16
|
| +- jb short .endcolumn
|
| +- mov eax,ecx
|
| +- xor ecx, byte 0x03
|
| +- inc ecx
|
| +- shl ecx, 4
|
| +- movd xmmF,ecx
|
| +- psrlq xmmE,xmmF
|
| +- punpcklbw xmmE,xmmE
|
| +- ; ----------------
|
| +- mov ecx,edi
|
| +- and ecx, byte SIZEOF_XMMWORD-1
|
| +- jz short .adj0
|
| +- lea eax, [ecx+eax*4] ; RGB_PIXELSIZE
|
| +- cmp eax, byte SIZEOF_XMMWORD
|
| +- ja short .adj0
|
| +- and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
| +- shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
| +- movdqa xmmB,xmmA
|
| +- movdqa xmmG,xmmE
|
| +- pslldq xmmA, SIZEOF_XMMWORD/2
|
| +- pslldq xmmE, SIZEOF_XMMWORD/2
|
| +- movd xmmC,ecx
|
| +- sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
| +- jb short .adj1
|
| +- movd xmmH,ecx
|
| +- psllq xmmA,xmmH
|
| +- psllq xmmE,xmmH
|
| +- jmp short .adj0
|
| +-.adj1: neg ecx
|
| +- movd xmmH,ecx
|
| +- psrlq xmmA,xmmH
|
| +- psrlq xmmE,xmmH
|
| +- psllq xmmB,xmmC
|
| +- psllq xmmG,xmmC
|
| +- por xmmA,xmmB
|
| +- por xmmE,xmmG
|
| +-.adj0: ; ----------------
|
| +- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
| ++ ; Store two pixels (8 bytes) of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ cmp ecx, byte SIZEOF_XMMWORD/8
|
| ++ jb short .column_st7
|
| ++ movq XMM_MMWORD [edi], xmmA
|
| ++ add edi, byte SIZEOF_XMMWORD/8*4
|
| ++ sub ecx, byte SIZEOF_XMMWORD/8
|
| ++ psrldq xmmA, SIZEOF_XMMWORD/8*4
|
| ++.column_st7:
|
| ++ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
|
| ++ ; space.
|
| ++ test ecx, ecx
|
| ++ jz short .endcolumn
|
| ++ movd XMM_DWORD [edi], xmmA
|
| +
|
| + %endif ; RGB_PIXELSIZE ; ---------------
|
| +
|
| +@@ -509,7 +464,7 @@
|
| + %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
|
|
|
| align 16
|
| -- global EXTN(jsimd_fdct_ifast_sse2)
|
| -+ global EXTN(jsimd_fdct_ifast_sse2) PRIVATE
|
| +- global EXTN(jsimd_h2v2_merged_upsample_sse2)
|
| ++ global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_fdct_ifast_sse2):
|
| + EXTN(jsimd_h2v2_merged_upsample_sse2):
|
| push ebp
|
| -Index: simd/jcgrammx.asm
|
| +@@ -559,3 +514,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jdsammmx.asm
|
| ===================================================================
|
| ---- simd/jcgrammx.asm (revision 829)
|
| -+++ simd/jcgrammx.asm (working copy)
|
| -@@ -33,7 +33,7 @@
|
| +--- simd/jdsammmx.asm (revision 829)
|
| ++++ simd/jdsammmx.asm (working copy)
|
| +@@ -22,7 +22,7 @@
|
| SECTION SEG_CONST
|
|
|
| alignz 16
|
| -- global EXTN(jconst_rgb_gray_convert_mmx)
|
| -+ global EXTN(jconst_rgb_gray_convert_mmx) PRIVATE
|
| +- global EXTN(jconst_fancy_upsample_mmx)
|
| ++ global EXTN(jconst_fancy_upsample_mmx) PRIVATE
|
|
|
| - EXTN(jconst_rgb_gray_convert_mmx):
|
| + EXTN(jconst_fancy_upsample_mmx):
|
|
|
| -Index: simd/jdcolss2-64.asm
|
| -===================================================================
|
| ---- simd/jdcolss2-64.asm (revision 829)
|
| -+++ simd/jdcolss2-64.asm (working copy)
|
| -@@ -35,7 +35,7 @@
|
| - SECTION SEG_CONST
|
| +@@ -58,7 +58,7 @@
|
| + %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
|
|
| - alignz 16
|
| -- global EXTN(jconst_ycc_rgb_convert_sse2)
|
| -+ global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE
|
| + align 16
|
| +- global EXTN(jsimd_h2v1_fancy_upsample_mmx)
|
| ++ global EXTN(jsimd_h2v1_fancy_upsample_mmx) PRIVATE
|
|
|
| - EXTN(jconst_ycc_rgb_convert_sse2):
|
| + EXTN(jsimd_h2v1_fancy_upsample_mmx):
|
| + push ebp
|
| +@@ -216,7 +216,7 @@
|
| + %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
|
|
| -Index: simd/jf3dnflt.asm
|
| -===================================================================
|
| ---- simd/jf3dnflt.asm (revision 829)
|
| -+++ simd/jf3dnflt.asm (working copy)
|
| -@@ -27,7 +27,7 @@
|
| - SECTION SEG_CONST
|
| + align 16
|
| +- global EXTN(jsimd_h2v2_fancy_upsample_mmx)
|
| ++ global EXTN(jsimd_h2v2_fancy_upsample_mmx) PRIVATE
|
|
|
| - alignz 16
|
| -- global EXTN(jconst_fdct_float_3dnow)
|
| -+ global EXTN(jconst_fdct_float_3dnow) PRIVATE
|
| + EXTN(jsimd_h2v2_fancy_upsample_mmx):
|
| + push ebp
|
| +@@ -542,7 +542,7 @@
|
| + %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
|
|
| - EXTN(jconst_fdct_float_3dnow):
|
| + align 16
|
| +- global EXTN(jsimd_h2v1_upsample_mmx)
|
| ++ global EXTN(jsimd_h2v1_upsample_mmx) PRIVATE
|
|
|
| -@@ -55,7 +55,7 @@
|
| - %define WK_NUM 2
|
| + EXTN(jsimd_h2v1_upsample_mmx):
|
| + push ebp
|
| +@@ -643,7 +643,7 @@
|
| + %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
|
|
| align 16
|
| -- global EXTN(jsimd_fdct_float_3dnow)
|
| -+ global EXTN(jsimd_fdct_float_3dnow) PRIVATE
|
| +- global EXTN(jsimd_h2v2_upsample_mmx)
|
| ++ global EXTN(jsimd_h2v2_upsample_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_fdct_float_3dnow):
|
| + EXTN(jsimd_h2v2_upsample_mmx):
|
| push ebp
|
| +@@ -732,3 +732,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| Index: simd/jdsamss2-64.asm
|
| ===================================================================
|
| --- simd/jdsamss2-64.asm (revision 829)
|
| +++ simd/jdsamss2-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jdsamss2.asm - upsampling (64-bit SSE2)
|
| ++; jdsamss2-64.asm - upsampling (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| @@ -23,7 +23,7 @@
|
| SECTION SEG_CONST
|
|
|
| @@ -1164,7 +13846,7 @@ Index: simd/jdsamss2-64.asm
|
|
|
| EXTN(jconst_fancy_upsample_sse2):
|
|
|
| -@@ -59,7 +59,7 @@
|
| +@@ -59,10 +59,11 @@
|
| ; r13 = JSAMPARRAY * output_data_ptr
|
|
|
| align 16
|
| @@ -1173,7 +13855,11 @@ Index: simd/jdsamss2-64.asm
|
|
|
| EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
| push rbp
|
| -@@ -201,7 +201,7 @@
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| + collect_args
|
| +
|
| +@@ -200,7 +201,7 @@
|
| %define WK_NUM 4
|
|
|
| align 16
|
| @@ -1182,7 +13868,27 @@ Index: simd/jdsamss2-64.asm
|
|
|
| EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
| push rbp
|
| -@@ -498,7 +498,7 @@
|
| +@@ -210,8 +211,8 @@
|
| + mov [rsp],rax
|
| + mov rbp,rsp ; rbp = aligned rbp
|
| + lea rsp, [wk(0)]
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + mov rax, r11 ; colctr
|
| + test rax,rax
|
| +@@ -472,8 +473,8 @@
|
| + jg near .rowloop
|
| +
|
| + .return:
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + mov rsp,rbp ; rsp <- aligned rbp
|
| + pop rsp ; rsp <- original rbp
|
| + pop rbp
|
| +@@ -497,10 +498,11 @@
|
| ; r13 = JSAMPARRAY * output_data_ptr
|
|
|
| align 16
|
| @@ -1191,7 +13897,11 @@ Index: simd/jdsamss2-64.asm
|
|
|
| EXTN(jsimd_h2v1_upsample_sse2):
|
| push rbp
|
| -@@ -587,7 +587,7 @@
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| + collect_args
|
| +
|
| +@@ -585,13 +587,14 @@
|
| ; r13 = JSAMPARRAY * output_data_ptr
|
|
|
| align 16
|
| @@ -1200,406 +13910,273 @@ Index: simd/jdsamss2-64.asm
|
|
|
| EXTN(jsimd_h2v2_upsample_sse2):
|
| push rbp
|
| -Index: simd/jcgrass2.asm
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + mov rdx, r11
|
| + add rdx, byte (2*SIZEOF_XMMWORD)-1
|
| +@@ -658,7 +661,11 @@
|
| + jg near .rowloop
|
| +
|
| + .return:
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jdsamss2.asm
|
| ===================================================================
|
| ---- simd/jcgrass2.asm (revision 829)
|
| -+++ simd/jcgrass2.asm (working copy)
|
| -@@ -30,7 +30,7 @@
|
| +--- simd/jdsamss2.asm (revision 829)
|
| ++++ simd/jdsamss2.asm (working copy)
|
| +@@ -22,7 +22,7 @@
|
| SECTION SEG_CONST
|
|
|
| alignz 16
|
| -- global EXTN(jconst_rgb_gray_convert_sse2)
|
| -+ global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE
|
| +- global EXTN(jconst_fancy_upsample_sse2)
|
| ++ global EXTN(jconst_fancy_upsample_sse2) PRIVATE
|
|
|
| - EXTN(jconst_rgb_gray_convert_sse2):
|
| + EXTN(jconst_fancy_upsample_sse2):
|
|
|
| -Index: simd/jcsammmx.asm
|
| -===================================================================
|
| ---- simd/jcsammmx.asm (revision 829)
|
| -+++ simd/jcsammmx.asm (working copy)
|
| -@@ -40,7 +40,7 @@
|
| - %define output_data(b) (b)+28 ; JSAMPARRAY output_data
|
| +@@ -58,7 +58,7 @@
|
| + %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
|
|
| align 16
|
| -- global EXTN(jsimd_h2v1_downsample_mmx)
|
| -+ global EXTN(jsimd_h2v1_downsample_mmx) PRIVATE
|
| +- global EXTN(jsimd_h2v1_fancy_upsample_sse2)
|
| ++ global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_h2v1_downsample_mmx):
|
| + EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
| push ebp
|
| -@@ -182,7 +182,7 @@
|
| - %define output_data(b) (b)+28 ; JSAMPARRAY output_data
|
| +@@ -214,7 +214,7 @@
|
| + %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
|
|
| align 16
|
| -- global EXTN(jsimd_h2v2_downsample_mmx)
|
| -+ global EXTN(jsimd_h2v2_downsample_mmx) PRIVATE
|
| +- global EXTN(jsimd_h2v2_fancy_upsample_sse2)
|
| ++ global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_h2v2_downsample_mmx):
|
| + EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
| push ebp
|
| -+Index: simd/jsimd_arm.c
|
| -+===================================================================
|
| -+--- simd/jsimd_arm.c (revision 272637)
|
| -++++ simd/jsimd_arm.c (working copy)
|
| -+@@ -29,0 +29,0 @@
|
| -+
|
| -+ static unsigned int simd_support = ~0;
|
| -+
|
| -+-#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
| -++#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
|
| -+
|
| -+ #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
|
| -+
|
| -+@@ -100,6 +100,6 @@
|
| -+ init_simd (void)
|
| -+ {
|
| -+ char *env = NULL;
|
| -+-#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
| -++#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
|
| -+ int bufsize = 1024; /* an initial guess for the line buffer size limit */
|
| -+ #endif
|
| -+
|
| -Index: simd/jsimd_arm_neon.S
|
| -===================================================================
|
| ---- simd/jsimd_arm_neon.S (revision 272637)
|
| -+++ simd/jsimd_arm_neon.S (working copy)
|
| -@@ -41,11 +41,9 @@
|
| - /* Supplementary macro for setting function attributes */
|
| - .macro asm_function fname
|
| - #ifdef __APPLE__
|
| -- .func _\fname
|
| - .globl _\fname
|
| - _\fname:
|
| - #else
|
| -- .func \fname
|
| - .global \fname
|
| - #ifdef __ELF__
|
| - .hidden \fname
|
| -@@ -670,7 +668,6 @@
|
| - .unreq ROW6R
|
| - .unreq ROW7L
|
| - .unreq ROW7R
|
| --.endfunc
|
| -
|
| -
|
| - /*****************************************************************************/
|
| -@@ -895,7 +892,6 @@
|
| - .unreq TMP2
|
| - .unreq TMP3
|
| - .unreq TMP4
|
| --.endfunc
|
| -
|
| -
|
| - /*****************************************************************************/
|
| -@@ -1108,7 +1104,6 @@
|
| - .unreq TMP2
|
| - .unreq TMP3
|
| - .unreq TMP4
|
| --.endfunc
|
| -
|
| - .purgem idct_helper
|
| -
|
| -@@ -1263,7 +1258,6 @@
|
| - .unreq OUTPUT_COL
|
| - .unreq TMP1
|
| - .unreq TMP2
|
| --.endfunc
|
| -
|
| - .purgem idct_helper
|
| -
|
| -@@ -1547,7 +1541,6 @@
|
| - .unreq U
|
| - .unreq V
|
| - .unreq N
|
| --.endfunc
|
| -
|
| - .purgem do_yuv_to_rgb
|
| - .purgem do_yuv_to_rgb_stage1
|
| -@@ -1858,7 +1851,6 @@
|
| - .unreq U
|
| - .unreq V
|
| - .unreq N
|
| --.endfunc
|
| -
|
| - .purgem do_rgb_to_yuv
|
| - .purgem do_rgb_to_yuv_stage1
|
| -@@ -1940,7 +1932,6 @@
|
| - .unreq TMP2
|
| - .unreq TMP3
|
| - .unreq TMP4
|
| --.endfunc
|
| -
|
| -
|
| - /*****************************************************************************/
|
| -@@ -2064,7 +2055,6 @@
|
| -
|
| - .unreq DATA
|
| - .unreq TMP
|
| --.endfunc
|
| -
|
| -
|
| - /*****************************************************************************/
|
| -@@ -2166,7 +2156,6 @@
|
| - .unreq CORRECTION
|
| - .unreq SHIFT
|
| - .unreq LOOP_COUNT
|
| --.endfunc
|
| -
|
| -
|
| - /*****************************************************************************/
|
| -@@ -2401,7 +2390,6 @@
|
| - .unreq WIDTH
|
| - .unreq TMP
|
| -
|
| --.endfunc
|
| -
|
| - .purgem upsample16
|
| - .purgem upsample32
|
| -Index: simd/jsimd_i386.c
|
| -===================================================================
|
| ---- simd/jsimd_i386.c (revision 829)
|
| -+++ simd/jsimd_i386.c (working copy)
|
| -@@ -61,6 +61,7 @@
|
| - simd_support &= JSIMD_SSE2;
|
| - }
|
| -
|
| -+#ifndef JPEG_DECODE_ONLY
|
| - GLOBAL(int)
|
| - jsimd_can_rgb_ycc (void)
|
| - {
|
| -@@ -82,6 +83,7 @@
|
| -
|
| - return 0;
|
| - }
|
| -+#endif
|
| -
|
| - GLOBAL(int)
|
| - jsimd_can_rgb_gray (void)
|
| -@@ -127,6 +129,7 @@
|
| - return 0;
|
| - }
|
| -
|
| -+#ifndef JPEG_DECODE_ONLY
|
| - GLOBAL(void)
|
| - jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
| - JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| -@@ -179,6 +182,7 @@
|
| - mmxfct(cinfo->image_width, input_buf,
|
| - output_buf, output_row, num_rows);
|
| - }
|
| -+#endif
|
| +@@ -538,7 +538,7 @@
|
| + %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
|
|
| - GLOBAL(void)
|
| - jsimd_rgb_gray_convert (j_compress_ptr cinfo,
|
| -@@ -286,6 +290,7 @@
|
| - input_row, output_buf, num_rows);
|
| - }
|
| + align 16
|
| +- global EXTN(jsimd_h2v1_upsample_sse2)
|
| ++ global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE
|
|
|
| -+#ifndef JPEG_DECODE_ONLY
|
| - GLOBAL(int)
|
| - jsimd_can_h2v2_downsample (void)
|
| - {
|
| -@@ -351,6 +356,7 @@
|
| - compptr->v_samp_factor, compptr->width_in_blocks,
|
| - input_data, output_data);
|
| - }
|
| -+#endif
|
| + EXTN(jsimd_h2v1_upsample_sse2):
|
| + push ebp
|
| +@@ -637,7 +637,7 @@
|
| + %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr
|
|
|
| - GLOBAL(int)
|
| - jsimd_can_h2v2_upsample (void)
|
| -@@ -636,6 +642,7 @@
|
| - in_row_group_ctr, output_buf);
|
| - }
|
| + align 16
|
| +- global EXTN(jsimd_h2v2_upsample_sse2)
|
| ++ global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE
|
|
|
| -+#ifndef JPEG_DECODE_ONLY
|
| - GLOBAL(int)
|
| - jsimd_can_convsamp (void)
|
| - {
|
| -@@ -855,6 +862,7 @@
|
| - else if (simd_support & JSIMD_3DNOW)
|
| - jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
|
| - }
|
| -+#endif
|
| + EXTN(jsimd_h2v2_upsample_sse2):
|
| + push ebp
|
| +@@ -724,3 +724,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| - GLOBAL(int)
|
| - jsimd_can_idct_2x2 (void)
|
| -@@ -1045,4 +1053,3 @@
|
| - jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
|
| - output_buf, output_col);
|
| - }
|
| --
|
| -Index: simd/jcqnts2f-64.asm
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jf3dnflt.asm
|
| ===================================================================
|
| ---- simd/jcqnts2f-64.asm (revision 829)
|
| -+++ simd/jcqnts2f-64.asm (working copy)
|
| -@@ -36,7 +36,7 @@
|
| - ; r12 = FAST_FLOAT * workspace
|
| -
|
| - align 16
|
| -- global EXTN(jsimd_convsamp_float_sse2)
|
| -+ global EXTN(jsimd_convsamp_float_sse2) PRIVATE
|
| +--- simd/jf3dnflt.asm (revision 829)
|
| ++++ simd/jf3dnflt.asm (working copy)
|
| +@@ -27,7 +27,7 @@
|
| + SECTION SEG_CONST
|
|
|
| - EXTN(jsimd_convsamp_float_sse2):
|
| - push rbp
|
| -@@ -110,7 +110,7 @@
|
| - ; r12 = FAST_FLOAT * workspace
|
| + alignz 16
|
| +- global EXTN(jconst_fdct_float_3dnow)
|
| ++ global EXTN(jconst_fdct_float_3dnow) PRIVATE
|
|
|
| - align 16
|
| -- global EXTN(jsimd_quantize_float_sse2)
|
| -+ global EXTN(jsimd_quantize_float_sse2) PRIVATE
|
| + EXTN(jconst_fdct_float_3dnow):
|
|
|
| - EXTN(jsimd_quantize_float_sse2):
|
| - push rbp
|
| -Index: simd/jcqnt3dn.asm
|
| -===================================================================
|
| ---- simd/jcqnt3dn.asm (revision 829)
|
| -+++ simd/jcqnt3dn.asm (working copy)
|
| -@@ -35,7 +35,7 @@
|
| - %define workspace ebp+16 ; FAST_FLOAT * workspace
|
| +@@ -55,7 +55,7 @@
|
| + %define WK_NUM 2
|
|
|
| align 16
|
| -- global EXTN(jsimd_convsamp_float_3dnow)
|
| -+ global EXTN(jsimd_convsamp_float_3dnow) PRIVATE
|
| +- global EXTN(jsimd_fdct_float_3dnow)
|
| ++ global EXTN(jsimd_fdct_float_3dnow) PRIVATE
|
|
|
| - EXTN(jsimd_convsamp_float_3dnow):
|
| + EXTN(jsimd_fdct_float_3dnow):
|
| push ebp
|
| -@@ -138,7 +138,7 @@
|
| - %define workspace ebp+16 ; FAST_FLOAT * workspace
|
| -
|
| - align 16
|
| -- global EXTN(jsimd_quantize_float_3dnow)
|
| -+ global EXTN(jsimd_quantize_float_3dnow) PRIVATE
|
| +@@ -315,3 +315,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| - EXTN(jsimd_quantize_float_3dnow):
|
| - push ebp
|
| -Index: simd/jcsamss2.asm
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jfmmxfst.asm
|
| ===================================================================
|
| ---- simd/jcsamss2.asm (revision 829)
|
| -+++ simd/jcsamss2.asm (working copy)
|
| -@@ -40,7 +40,7 @@
|
| - %define output_data(b) (b)+28 ; JSAMPARRAY output_data
|
| +--- simd/jfmmxfst.asm (revision 829)
|
| ++++ simd/jfmmxfst.asm (working copy)
|
| +@@ -52,7 +52,7 @@
|
| + %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
| - align 16
|
| -- global EXTN(jsimd_h2v1_downsample_sse2)
|
| -+ global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE
|
| + alignz 16
|
| +- global EXTN(jconst_fdct_ifast_mmx)
|
| ++ global EXTN(jconst_fdct_ifast_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_h2v1_downsample_sse2):
|
| - push ebp
|
| -@@ -195,7 +195,7 @@
|
| - %define output_data(b) (b)+28 ; JSAMPARRAY output_data
|
| + EXTN(jconst_fdct_ifast_mmx):
|
| +
|
| +@@ -80,7 +80,7 @@
|
| + %define WK_NUM 2
|
|
|
| align 16
|
| -- global EXTN(jsimd_h2v2_downsample_sse2)
|
| -+ global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE
|
| +- global EXTN(jsimd_fdct_ifast_mmx)
|
| ++ global EXTN(jsimd_fdct_ifast_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_h2v2_downsample_sse2):
|
| + EXTN(jsimd_fdct_ifast_mmx):
|
| push ebp
|
| -Index: simd/jsimd_x86_64.c
|
| +@@ -392,3 +392,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jfmmxint.asm
|
| ===================================================================
|
| ---- simd/jsimd_x86_64.c (revision 829)
|
| -+++ simd/jsimd_x86_64.c (working copy)
|
| -@@ -29,6 +29,7 @@
|
| +--- simd/jfmmxint.asm (revision 829)
|
| ++++ simd/jfmmxint.asm (working copy)
|
| +@@ -66,7 +66,7 @@
|
| + SECTION SEG_CONST
|
|
|
| - #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
|
| + alignz 16
|
| +- global EXTN(jconst_fdct_islow_mmx)
|
| ++ global EXTN(jconst_fdct_islow_mmx) PRIVATE
|
|
|
| -+#ifndef JPEG_DECODE_ONLY
|
| - GLOBAL(int)
|
| - jsimd_can_rgb_ycc (void)
|
| - {
|
| -@@ -45,6 +46,7 @@
|
| + EXTN(jconst_fdct_islow_mmx):
|
|
|
| - return 1;
|
| - }
|
| -+#endif
|
| +@@ -101,7 +101,7 @@
|
| + %define WK_NUM 2
|
|
|
| - GLOBAL(int)
|
| - jsimd_can_rgb_gray (void)
|
| -@@ -80,6 +82,7 @@
|
| - return 1;
|
| - }
|
| + align 16
|
| +- global EXTN(jsimd_fdct_islow_mmx)
|
| ++ global EXTN(jsimd_fdct_islow_mmx) PRIVATE
|
|
|
| -+#ifndef JPEG_DECODE_ONLY
|
| - GLOBAL(void)
|
| - jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
| - JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| -@@ -118,6 +121,7 @@
|
| + EXTN(jsimd_fdct_islow_mmx):
|
| + push ebp
|
| +@@ -617,3 +617,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| - sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
|
| - }
|
| -+#endif
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jfss2fst-64.asm
|
| +===================================================================
|
| +--- simd/jfss2fst-64.asm (revision 829)
|
| ++++ simd/jfss2fst-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jfss2fst.asm - fast integer FDCT (64-bit SSE2)
|
| ++; jfss2fst-64.asm - fast integer FDCT (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -53,7 +53,7 @@
|
| + %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
| - GLOBAL(void)
|
| - jsimd_rgb_gray_convert (j_compress_ptr cinfo,
|
| -@@ -197,6 +201,7 @@
|
| - sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
| - }
|
| + alignz 16
|
| +- global EXTN(jconst_fdct_ifast_sse2)
|
| ++ global EXTN(jconst_fdct_ifast_sse2) PRIVATE
|
|
|
| -+#ifndef JPEG_DECODE_ONLY
|
| - GLOBAL(int)
|
| - jsimd_can_h2v2_downsample (void)
|
| - {
|
| -@@ -242,6 +247,7 @@
|
| - compptr->width_in_blocks,
|
| - input_data, output_data);
|
| - }
|
| -+#endif
|
| + EXTN(jconst_fdct_ifast_sse2):
|
|
|
| - GLOBAL(int)
|
| - jsimd_can_h2v2_upsample (void)
|
| -@@ -451,6 +457,7 @@
|
| - sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
|
| - }
|
| +@@ -80,7 +80,7 @@
|
| + %define WK_NUM 2
|
|
|
| -+#ifndef JPEG_DECODE_ONLY
|
| - GLOBAL(int)
|
| - jsimd_can_convsamp (void)
|
| - {
|
| -@@ -601,6 +608,7 @@
|
| - {
|
| - jsimd_quantize_float_sse2(coef_block, divisors, workspace);
|
| - }
|
| -+#endif
|
| + align 16
|
| +- global EXTN(jsimd_fdct_ifast_sse2)
|
| ++ global EXTN(jsimd_fdct_ifast_sse2) PRIVATE
|
|
|
| - GLOBAL(int)
|
| - jsimd_can_idct_2x2 (void)
|
| -@@ -750,4 +758,3 @@
|
| - jsimd_idct_float_sse2(compptr->dct_table, coef_block,
|
| - output_buf, output_col);
|
| - }
|
| --
|
| -Index: simd/jimmxint.asm
|
| + EXTN(jsimd_fdct_ifast_sse2):
|
| + push rbp
|
| +@@ -386,3 +386,7 @@
|
| + pop rsp ; rsp <- original rbp
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jfss2fst.asm
|
| ===================================================================
|
| ---- simd/jimmxint.asm (revision 829)
|
| -+++ simd/jimmxint.asm (working copy)
|
| -@@ -66,7 +66,7 @@
|
| - SECTION SEG_CONST
|
| +--- simd/jfss2fst.asm (revision 829)
|
| ++++ simd/jfss2fst.asm (working copy)
|
| +@@ -52,7 +52,7 @@
|
| + %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
| alignz 16
|
| -- global EXTN(jconst_idct_islow_mmx)
|
| -+ global EXTN(jconst_idct_islow_mmx) PRIVATE
|
| +- global EXTN(jconst_fdct_ifast_sse2)
|
| ++ global EXTN(jconst_fdct_ifast_sse2) PRIVATE
|
|
|
| - EXTN(jconst_idct_islow_mmx):
|
| + EXTN(jconst_fdct_ifast_sse2):
|
|
|
| -@@ -107,7 +107,7 @@
|
| - ; JCOEF workspace[DCTSIZE2]
|
| +@@ -80,7 +80,7 @@
|
| + %define WK_NUM 2
|
|
|
| align 16
|
| -- global EXTN(jsimd_idct_islow_mmx)
|
| -+ global EXTN(jsimd_idct_islow_mmx) PRIVATE
|
| +- global EXTN(jsimd_fdct_ifast_sse2)
|
| ++ global EXTN(jsimd_fdct_ifast_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_idct_islow_mmx):
|
| + EXTN(jsimd_fdct_ifast_sse2):
|
| push ebp
|
| -Index: simd/jcgrymmx.asm
|
| +@@ -399,3 +399,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jfss2int-64.asm
|
| ===================================================================
|
| ---- simd/jcgrymmx.asm (revision 829)
|
| -+++ simd/jcgrymmx.asm (working copy)
|
| -@@ -41,7 +41,7 @@
|
| - %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
| +--- simd/jfss2int-64.asm (revision 829)
|
| ++++ simd/jfss2int-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jfss2int.asm - accurate integer FDCT (64-bit SSE2)
|
| ++; jfss2int-64.asm - accurate integer FDCT (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -67,7 +67,7 @@
|
| + SECTION SEG_CONST
|
| +
|
| + alignz 16
|
| +- global EXTN(jconst_fdct_islow_sse2)
|
| ++ global EXTN(jconst_fdct_islow_sse2) PRIVATE
|
| +
|
| + EXTN(jconst_fdct_islow_sse2):
|
| +
|
| +@@ -101,7 +101,7 @@
|
| + %define WK_NUM 6
|
|
|
| align 16
|
| -- global EXTN(jsimd_rgb_gray_convert_mmx)
|
| -+ global EXTN(jsimd_rgb_gray_convert_mmx) PRIVATE
|
| +- global EXTN(jsimd_fdct_islow_sse2)
|
| ++ global EXTN(jsimd_fdct_islow_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_rgb_gray_convert_mmx):
|
| - push ebp
|
| + EXTN(jsimd_fdct_islow_sse2):
|
| + push rbp
|
| +@@ -616,3 +616,7 @@
|
| + pop rsp ; rsp <- original rbp
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| Index: simd/jfss2int.asm
|
| ===================================================================
|
| --- simd/jfss2int.asm (revision 829)
|
| @@ -1622,32 +14199,166 @@ Index: simd/jfss2int.asm
|
|
|
| EXTN(jsimd_fdct_islow_sse2):
|
| push ebp
|
| -Index: simd/jcgryss2.asm
|
| +@@ -629,3 +629,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jfsseflt-64.asm
|
| +===================================================================
|
| +--- simd/jfsseflt-64.asm (revision 829)
|
| ++++ simd/jfsseflt-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jfsseflt.asm - floating-point FDCT (64-bit SSE)
|
| ++; jfsseflt-64.asm - floating-point FDCT (64-bit SSE)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -38,7 +38,7 @@
|
| + SECTION SEG_CONST
|
| +
|
| + alignz 16
|
| +- global EXTN(jconst_fdct_float_sse)
|
| ++ global EXTN(jconst_fdct_float_sse) PRIVATE
|
| +
|
| + EXTN(jconst_fdct_float_sse):
|
| +
|
| +@@ -65,7 +65,7 @@
|
| + %define WK_NUM 2
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_fdct_float_sse)
|
| ++ global EXTN(jsimd_fdct_float_sse) PRIVATE
|
| +
|
| + EXTN(jsimd_fdct_float_sse):
|
| + push rbp
|
| +@@ -352,3 +352,7 @@
|
| + pop rsp ; rsp <- original rbp
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jfsseflt.asm
|
| +===================================================================
|
| +--- simd/jfsseflt.asm (revision 829)
|
| ++++ simd/jfsseflt.asm (working copy)
|
| +@@ -37,7 +37,7 @@
|
| + SECTION SEG_CONST
|
| +
|
| + alignz 16
|
| +- global EXTN(jconst_fdct_float_sse)
|
| ++ global EXTN(jconst_fdct_float_sse) PRIVATE
|
| +
|
| + EXTN(jconst_fdct_float_sse):
|
| +
|
| +@@ -65,7 +65,7 @@
|
| + %define WK_NUM 2
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_fdct_float_sse)
|
| ++ global EXTN(jsimd_fdct_float_sse) PRIVATE
|
| +
|
| + EXTN(jsimd_fdct_float_sse):
|
| + push ebp
|
| +@@ -365,3 +365,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/ji3dnflt.asm
|
| ===================================================================
|
| ---- simd/jcgryss2.asm (revision 829)
|
| -+++ simd/jcgryss2.asm (working copy)
|
| -@@ -39,7 +39,7 @@
|
| +--- simd/ji3dnflt.asm (revision 829)
|
| ++++ simd/ji3dnflt.asm (working copy)
|
| +@@ -27,7 +27,7 @@
|
| + SECTION SEG_CONST
|
| +
|
| + alignz 16
|
| +- global EXTN(jconst_idct_float_3dnow)
|
| ++ global EXTN(jconst_idct_float_3dnow) PRIVATE
|
| +
|
| + EXTN(jconst_idct_float_3dnow):
|
| +
|
| +@@ -63,7 +63,7 @@
|
| + ; FAST_FLOAT workspace[DCTSIZE2]
|
|
|
| align 16
|
| +- global EXTN(jsimd_idct_float_3dnow)
|
| ++ global EXTN(jsimd_idct_float_3dnow) PRIVATE
|
| +
|
| + EXTN(jsimd_idct_float_3dnow):
|
| + push ebp
|
| +@@ -447,3 +447,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jimmxfst.asm
|
| +===================================================================
|
| +--- simd/jimmxfst.asm (revision 829)
|
| ++++ simd/jimmxfst.asm (working copy)
|
| +@@ -59,7 +59,7 @@
|
| + %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
| +
|
| + alignz 16
|
| +- global EXTN(jconst_idct_ifast_mmx)
|
| ++ global EXTN(jconst_idct_ifast_mmx) PRIVATE
|
| +
|
| + EXTN(jconst_idct_ifast_mmx):
|
|
|
| -- global EXTN(jsimd_rgb_gray_convert_sse2)
|
| -+ global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE
|
| +@@ -94,7 +94,7 @@
|
| + ; JCOEF workspace[DCTSIZE2]
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_idct_ifast_mmx)
|
| ++ global EXTN(jsimd_idct_ifast_mmx) PRIVATE
|
|
|
| - EXTN(jsimd_rgb_gray_convert_sse2):
|
| + EXTN(jsimd_idct_ifast_mmx):
|
| push ebp
|
| -Index: simd/jccolmmx.asm
|
| +@@ -495,3 +495,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jimmxint.asm
|
| ===================================================================
|
| ---- simd/jccolmmx.asm (revision 829)
|
| -+++ simd/jccolmmx.asm (working copy)
|
| -@@ -37,7 +37,7 @@
|
| +--- simd/jimmxint.asm (revision 829)
|
| ++++ simd/jimmxint.asm (working copy)
|
| +@@ -66,7 +66,7 @@
|
| SECTION SEG_CONST
|
|
|
| alignz 16
|
| -- global EXTN(jconst_rgb_ycc_convert_mmx)
|
| -+ global EXTN(jconst_rgb_ycc_convert_mmx) PRIVATE
|
| +- global EXTN(jconst_idct_islow_mmx)
|
| ++ global EXTN(jconst_idct_islow_mmx) PRIVATE
|
|
|
| - EXTN(jconst_rgb_ycc_convert_mmx):
|
| + EXTN(jconst_idct_islow_mmx):
|
| +
|
| +@@ -107,7 +107,7 @@
|
| + ; JCOEF workspace[DCTSIZE2]
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_idct_islow_mmx)
|
| ++ global EXTN(jsimd_idct_islow_mmx) PRIVATE
|
| +
|
| + EXTN(jsimd_idct_islow_mmx):
|
| + push ebp
|
| +@@ -847,3 +847,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| Index: simd/jimmxred.asm
|
| ===================================================================
|
| --- simd/jimmxred.asm (revision 829)
|
| @@ -1679,144 +14390,186 @@ Index: simd/jimmxred.asm
|
|
|
| EXTN(jsimd_idct_2x2_mmx):
|
| push ebp
|
| -Index: simd/jsimdext.inc
|
| -===================================================================
|
| ---- simd/jsimdext.inc (revision 829)
|
| -+++ simd/jsimdext.inc (working copy)
|
| -@@ -73,6 +73,9 @@
|
| - ; * *BSD family Unix using elf format
|
| - ; * Unix System V, including Solaris x86, UnixWare and SCO Unix
|
| -
|
| -+; PIC is the default on Linux
|
| -+%define PIC
|
| -+
|
| - ; mark stack as non-executable
|
| - section .note.GNU-stack noalloc noexec nowrite progbits
|
| -
|
| -@@ -375,4 +378,14 @@
|
| - ;
|
| - %include "jsimdcfg.inc"
|
| -
|
| -+; Begin chromium edits
|
| -+%ifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
|
| -+%define PRIVATE :private_extern
|
| -+%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
|
| -+%define PRIVATE :hidden
|
| -+%else
|
| -+%define PRIVATE
|
| -+%endif
|
| -+; End chromium edits
|
| -+
|
| - ; --------------------------------------------------------------------------
|
| -Index: simd/jdclrmmx.asm
|
| -===================================================================
|
| ---- simd/jdclrmmx.asm (revision 829)
|
| -+++ simd/jdclrmmx.asm (working copy)
|
| -@@ -40,7 +40,7 @@
|
| - %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
| -
|
| - align 16
|
| -- global EXTN(jsimd_ycc_rgb_convert_mmx)
|
| -+ global EXTN(jsimd_ycc_rgb_convert_mmx) PRIVATE
|
| +@@ -701,3 +701,6 @@
|
| + pop ebp
|
| + ret
|
|
|
| - EXTN(jsimd_ycc_rgb_convert_mmx):
|
| - push ebp
|
| -Index: simd/jccolss2.asm
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jiss2flt-64.asm
|
| ===================================================================
|
| ---- simd/jccolss2.asm (revision 829)
|
| -+++ simd/jccolss2.asm (working copy)
|
| -@@ -34,7 +34,7 @@
|
| +--- simd/jiss2flt-64.asm (revision 829)
|
| ++++ simd/jiss2flt-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jiss2flt.asm - floating-point IDCT (64-bit SSE & SSE2)
|
| ++; jiss2flt-64.asm - floating-point IDCT (64-bit SSE & SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -38,7 +38,7 @@
|
| SECTION SEG_CONST
|
|
|
| alignz 16
|
| -- global EXTN(jconst_rgb_ycc_convert_sse2)
|
| -+ global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE
|
| +- global EXTN(jconst_idct_float_sse2)
|
| ++ global EXTN(jconst_idct_float_sse2) PRIVATE
|
|
|
| - EXTN(jconst_rgb_ycc_convert_sse2):
|
| + EXTN(jconst_idct_float_sse2):
|
|
|
| -Index: simd/jisseflt.asm
|
| +@@ -74,7 +74,7 @@
|
| + ; FAST_FLOAT workspace[DCTSIZE2]
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_idct_float_sse2)
|
| ++ global EXTN(jsimd_idct_float_sse2) PRIVATE
|
| +
|
| + EXTN(jsimd_idct_float_sse2):
|
| + push rbp
|
| +@@ -81,11 +81,11 @@
|
| + mov rax,rsp ; rax = original rbp
|
| + sub rsp, byte 4
|
| + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
| +- mov [rsp],eax
|
| ++ mov [rsp],rax
|
| + mov rbp,rsp ; rbp = aligned rbp
|
| + lea rsp, [workspace]
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + ; ---- Pass 1: process columns from input, store into work array.
|
| +
|
| +@@ -471,9 +471,13 @@
|
| + dec rcx ; ctr
|
| + jnz near .rowloop
|
| +
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + mov rsp,rbp ; rsp <- aligned rbp
|
| + pop rsp ; rsp <- original rbp
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jiss2flt.asm
|
| ===================================================================
|
| ---- simd/jisseflt.asm (revision 829)
|
| -+++ simd/jisseflt.asm (working copy)
|
| +--- simd/jiss2flt.asm (revision 829)
|
| ++++ simd/jiss2flt.asm (working copy)
|
| @@ -37,7 +37,7 @@
|
| SECTION SEG_CONST
|
|
|
| alignz 16
|
| -- global EXTN(jconst_idct_float_sse)
|
| -+ global EXTN(jconst_idct_float_sse) PRIVATE
|
| +- global EXTN(jconst_idct_float_sse2)
|
| ++ global EXTN(jconst_idct_float_sse2) PRIVATE
|
|
|
| - EXTN(jconst_idct_float_sse):
|
| + EXTN(jconst_idct_float_sse2):
|
|
|
| @@ -73,7 +73,7 @@
|
| ; FAST_FLOAT workspace[DCTSIZE2]
|
|
|
| align 16
|
| -- global EXTN(jsimd_idct_float_sse)
|
| -+ global EXTN(jsimd_idct_float_sse) PRIVATE
|
| +- global EXTN(jsimd_idct_float_sse2)
|
| ++ global EXTN(jsimd_idct_float_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_idct_float_sse):
|
| + EXTN(jsimd_idct_float_sse2):
|
| push ebp
|
| -Index: simd/jcqnts2i-64.asm
|
| +@@ -493,3 +493,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jiss2fst-64.asm
|
| ===================================================================
|
| ---- simd/jcqnts2i-64.asm (revision 829)
|
| -+++ simd/jcqnts2i-64.asm (working copy)
|
| -@@ -36,7 +36,7 @@
|
| - ; r12 = DCTELEM * workspace
|
| +--- simd/jiss2fst-64.asm (revision 829)
|
| ++++ simd/jiss2fst-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jiss2fst.asm - fast integer IDCT (64-bit SSE2)
|
| ++; jiss2fst-64.asm - fast integer IDCT (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -60,7 +60,7 @@
|
| + %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
| - align 16
|
| -- global EXTN(jsimd_convsamp_sse2)
|
| -+ global EXTN(jsimd_convsamp_sse2) PRIVATE
|
| + alignz 16
|
| +- global EXTN(jconst_idct_ifast_sse2)
|
| ++ global EXTN(jconst_idct_ifast_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_convsamp_sse2):
|
| - push rbp
|
| -@@ -112,7 +112,7 @@
|
| - ; r12 = DCTELEM * workspace
|
| + EXTN(jconst_idct_ifast_sse2):
|
| +
|
| +@@ -93,7 +93,7 @@
|
| + %define WK_NUM 2
|
|
|
| align 16
|
| -- global EXTN(jsimd_quantize_sse2)
|
| -+ global EXTN(jsimd_quantize_sse2) PRIVATE
|
| +- global EXTN(jsimd_idct_ifast_sse2)
|
| ++ global EXTN(jsimd_idct_ifast_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_quantize_sse2):
|
| + EXTN(jsimd_idct_ifast_sse2):
|
| push rbp
|
| -Index: simd/jdclrss2.asm
|
| +@@ -100,7 +100,7 @@
|
| + mov rax,rsp ; rax = original rbp
|
| + sub rsp, byte 4
|
| + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
| +- mov [rsp],eax
|
| ++ mov [rsp],rax
|
| + mov rbp,rsp ; rbp = aligned rbp
|
| + lea rsp, [wk(0)]
|
| + collect_args
|
| +@@ -486,3 +486,7 @@
|
| + pop rbp
|
| + ret
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jiss2fst.asm
|
| ===================================================================
|
| ---- simd/jdclrss2.asm (revision 829)
|
| -+++ simd/jdclrss2.asm (working copy)
|
| -@@ -40,7 +40,7 @@
|
| - %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
| -
|
| - align 16
|
| -- global EXTN(jsimd_ycc_rgb_convert_sse2)
|
| -+ global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE
|
| +--- simd/jiss2fst.asm (revision 829)
|
| ++++ simd/jiss2fst.asm (working copy)
|
| +@@ -59,7 +59,7 @@
|
| + %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
| - EXTN(jsimd_ycc_rgb_convert_sse2):
|
| - push ebp
|
| -Index: simd/jcqntsse.asm
|
| -===================================================================
|
| ---- simd/jcqntsse.asm (revision 829)
|
| -+++ simd/jcqntsse.asm (working copy)
|
| -@@ -35,7 +35,7 @@
|
| - %define workspace ebp+16 ; FAST_FLOAT * workspace
|
| + alignz 16
|
| +- global EXTN(jconst_idct_ifast_sse2)
|
| ++ global EXTN(jconst_idct_ifast_sse2) PRIVATE
|
|
|
| - align 16
|
| -- global EXTN(jsimd_convsamp_float_sse)
|
| -+ global EXTN(jsimd_convsamp_float_sse) PRIVATE
|
| + EXTN(jconst_idct_ifast_sse2):
|
|
|
| - EXTN(jsimd_convsamp_float_sse):
|
| - push ebp
|
| -@@ -138,7 +138,7 @@
|
| - %define workspace ebp+16 ; FAST_FLOAT * workspace
|
| +@@ -92,7 +92,7 @@
|
| + %define WK_NUM 2
|
|
|
| align 16
|
| -- global EXTN(jsimd_quantize_float_sse)
|
| -+ global EXTN(jsimd_quantize_float_sse) PRIVATE
|
| +- global EXTN(jsimd_idct_ifast_sse2)
|
| ++ global EXTN(jsimd_idct_ifast_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_quantize_float_sse):
|
| + EXTN(jsimd_idct_ifast_sse2):
|
| push ebp
|
| +@@ -497,3 +497,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| Index: simd/jiss2int-64.asm
|
| ===================================================================
|
| --- simd/jiss2int-64.asm (revision 829)
|
| +++ simd/jiss2int-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jiss2int.asm - accurate integer IDCT (64-bit SSE2)
|
| ++; jiss2int-64.asm - accurate integer IDCT (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| @@ -67,7 +67,7 @@
|
| SECTION SEG_CONST
|
|
|
| @@ -1835,279 +14588,507 @@ Index: simd/jiss2int-64.asm
|
|
|
| EXTN(jsimd_idct_islow_sse2):
|
| push rbp
|
| -Index: simd/jfmmxfst.asm
|
| +@@ -842,3 +842,7 @@
|
| + pop rsp ; rsp <- original rbp
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jiss2int.asm
|
| ===================================================================
|
| ---- simd/jfmmxfst.asm (revision 829)
|
| -+++ simd/jfmmxfst.asm (working copy)
|
| -@@ -52,7 +52,7 @@
|
| - %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
| +--- simd/jiss2int.asm (revision 829)
|
| ++++ simd/jiss2int.asm (working copy)
|
| +@@ -66,7 +66,7 @@
|
| + SECTION SEG_CONST
|
|
|
| alignz 16
|
| -- global EXTN(jconst_fdct_ifast_mmx)
|
| -+ global EXTN(jconst_fdct_ifast_mmx) PRIVATE
|
| +- global EXTN(jconst_idct_islow_sse2)
|
| ++ global EXTN(jconst_idct_islow_sse2) PRIVATE
|
|
|
| - EXTN(jconst_fdct_ifast_mmx):
|
| + EXTN(jconst_idct_islow_sse2):
|
|
|
| -@@ -80,7 +80,7 @@
|
| - %define WK_NUM 2
|
| +@@ -105,7 +105,7 @@
|
| + %define WK_NUM 12
|
|
|
| align 16
|
| -- global EXTN(jsimd_fdct_ifast_mmx)
|
| -+ global EXTN(jsimd_fdct_ifast_mmx) PRIVATE
|
| +- global EXTN(jsimd_idct_islow_sse2)
|
| ++ global EXTN(jsimd_idct_islow_sse2) PRIVATE
|
|
|
| - EXTN(jsimd_fdct_ifast_mmx):
|
| + EXTN(jsimd_idct_islow_sse2):
|
| push ebp
|
| -Index: jdarith.c
|
| -===================================================================
|
| ---- jdarith.c (revision 829)
|
| -+++ jdarith.c (working copy)
|
| -@@ -150,8 +150,8 @@
|
| - */
|
| - sv = *st;
|
| - qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */
|
| -- nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */
|
| -- nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */
|
| -+ nl = (unsigned char) qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */
|
| -+ nm = (unsigned char) qe & 0xFF; qe >>= 8; /* Next_Index_MPS */
|
| -
|
| - /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
|
| - temp = e->a - qe;
|
| -Index: jdhuff.c
|
| +@@ -854,3 +854,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jiss2red-64.asm
|
| ===================================================================
|
| ---- jdhuff.c (revision 1541)
|
| -+++ jdhuff.c (working copy)
|
| -@@ -662,7 +662,7 @@
|
| - d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
|
| - register int s, k, r, l;
|
| +--- simd/jiss2red-64.asm (revision 829)
|
| ++++ simd/jiss2red-64.asm (working copy)
|
| +@@ -1,5 +1,5 @@
|
| + ;
|
| +-; jiss2red.asm - reduced-size IDCT (64-bit SSE2)
|
| ++; jiss2red-64.asm - reduced-size IDCT (64-bit SSE2)
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| + ; Copyright 2009 D. R. Commander
|
| +@@ -73,7 +73,7 @@
|
| + SECTION SEG_CONST
|
|
|
| -- HUFF_DECODE_FAST(s, l, dctbl);
|
| -+ HUFF_DECODE_FAST(s, l, dctbl, slow_decode_mcu);
|
| - if (s) {
|
| - FILL_BIT_BUFFER_FAST
|
| - r = GET_BITS(s);
|
| -@@ -679,7 +679,7 @@
|
| - if (entropy->ac_needed[blkn]) {
|
| -
|
| - for (k = 1; k < DCTSIZE2; k++) {
|
| -- HUFF_DECODE_FAST(s, l, actbl);
|
| -+ HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu);
|
| - r = s >> 4;
|
| - s &= 15;
|
| -
|
| -@@ -698,7 +698,7 @@
|
| - } else {
|
| + alignz 16
|
| +- global EXTN(jconst_idct_red_sse2)
|
| ++ global EXTN(jconst_idct_red_sse2) PRIVATE
|
|
|
| - for (k = 1; k < DCTSIZE2; k++) {
|
| -- HUFF_DECODE_FAST(s, l, actbl);
|
| -+ HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu);
|
| - r = s >> 4;
|
| - s &= 15;
|
| + EXTN(jconst_idct_red_sse2):
|
|
|
| -@@ -715,6 +715,7 @@
|
| - }
|
| +@@ -114,7 +114,7 @@
|
| + %define WK_NUM 2
|
|
|
| - if (cinfo->unread_marker != 0) {
|
| -+slow_decode_mcu:
|
| - cinfo->unread_marker = 0;
|
| - return FALSE;
|
| - }
|
| -@@ -742,7 +743,7 @@
|
| - * this module, since we'll just re-assign them on the next call.)
|
| - */
|
| + align 16
|
| +- global EXTN(jsimd_idct_4x4_sse2)
|
| ++ global EXTN(jsimd_idct_4x4_sse2) PRIVATE
|
|
|
| --#define BUFSIZE (DCTSIZE2 * 2)
|
| -+#define BUFSIZE (DCTSIZE2 * 2u)
|
| + EXTN(jsimd_idct_4x4_sse2):
|
| + push rbp
|
| +@@ -121,7 +121,7 @@
|
| + mov rax,rsp ; rax = original rbp
|
| + sub rsp, byte 4
|
| + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
| +- mov [rsp],eax
|
| ++ mov [rsp],rax
|
| + mov rbp,rsp ; rbp = aligned rbp
|
| + lea rsp, [wk(0)]
|
| + collect_args
|
| +@@ -413,13 +413,14 @@
|
| + ; r13 = JDIMENSION output_col
|
|
|
| - METHODDEF(boolean)
|
| - decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
| -Index: jdhuff.h
|
| + align 16
|
| +- global EXTN(jsimd_idct_2x2_sse2)
|
| ++ global EXTN(jsimd_idct_2x2_sse2) PRIVATE
|
| +
|
| + EXTN(jsimd_idct_2x2_sse2):
|
| + push rbp
|
| ++ mov rax,rsp
|
| + mov rbp,rsp
|
| ++ collect_args
|
| + push rbx
|
| +- collect_args
|
| +
|
| + ; ---- Pass 1: process columns from input.
|
| +
|
| +@@ -565,7 +566,11 @@
|
| + mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx
|
| + mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx
|
| +
|
| ++ pop rbx
|
| + uncollect_args
|
| +- pop rbx
|
| + pop rbp
|
| + ret
|
| ++
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jiss2red.asm
|
| ===================================================================
|
| ---- jdhuff.h (revision 1541)
|
| -+++ jdhuff.h (working copy)
|
| -@@ -208,7 +208,7 @@
|
| - } \
|
| - }
|
| +--- simd/jiss2red.asm (revision 829)
|
| ++++ simd/jiss2red.asm (working copy)
|
| +@@ -72,7 +72,7 @@
|
| + SECTION SEG_CONST
|
|
|
| --#define HUFF_DECODE_FAST(s,nb,htbl) \
|
| -+#define HUFF_DECODE_FAST(s,nb,htbl,slowlabel) \
|
| - FILL_BIT_BUFFER_FAST; \
|
| - s = PEEK_BITS(HUFF_LOOKAHEAD); \
|
| - s = htbl->lookup[s]; \
|
| -@@ -225,7 +225,9 @@
|
| - s |= GET_BITS(1); \
|
| - nb++; \
|
| - } \
|
| -- s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) & 0xFF ]; \
|
| -+ if (nb > 16) \
|
| -+ goto slowlabel; \
|
| -+ s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) ]; \
|
| - }
|
| + alignz 16
|
| +- global EXTN(jconst_idct_red_sse2)
|
| ++ global EXTN(jconst_idct_red_sse2) PRIVATE
|
|
|
| - /* Out-of-line case for Huffman code fetching */
|
| + EXTN(jconst_idct_red_sse2):
|
|
|
| -Index: jchuff.c
|
| -===================================================================
|
| ---- jchuff.c (revision 1219)
|
| -+++ jchuff.c (revision 1220)
|
| -@@ -22,8 +22,36 @@
|
| - #include "jchuff.h" /* Declarations shared with jcphuff.c */
|
| - #include <limits.h>
|
| +@@ -113,7 +113,7 @@
|
| + %define WK_NUM 2
|
|
|
| -+/*
|
| -+ * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
|
| -+ * used for bit counting rather than the lookup table. This will reduce the
|
| -+ * memory footprint by 64k, which is important for some mobile applications
|
| -+ * that create many isolated instances of libjpeg-turbo (web browsers, for
|
| -+ * instance.) This may improve performance on some mobile platforms as well.
|
| -+ * This feature is enabled by default only on ARM processors, because some x86
|
| -+ * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
|
| -+ * shown to have a significant performance impact even on the x86 chips that
|
| -+ * have a fast implementation of it. When building for ARMv6, you can
|
| -+ * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
|
| -+ * flags (this defines __thumb__).
|
| -+ */
|
| -+
|
| -+/* NOTE: Both GCC and Clang define __GNUC__ */
|
| -+#if defined __GNUC__ && defined __arm__
|
| -+#if !defined __thumb__ || defined __thumb2__
|
| -+#define USE_CLZ_INTRINSIC
|
| -+#endif
|
| -+#endif
|
| -+
|
| -+#ifdef USE_CLZ_INTRINSIC
|
| -+#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
|
| -+#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
|
| -+#else
|
| - static unsigned char jpeg_nbits_table[65536];
|
| - static int jpeg_nbits_table_init = 0;
|
| -+#define JPEG_NBITS(x) (jpeg_nbits_table[x])
|
| -+#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
|
| -+#endif
|
| + align 16
|
| +- global EXTN(jsimd_idct_4x4_sse2)
|
| ++ global EXTN(jsimd_idct_4x4_sse2) PRIVATE
|
|
|
| - #ifndef min
|
| - #define min(a,b) ((a)<(b)?(a):(b))
|
| -@@ -272,6 +300,7 @@
|
| - dtbl->ehufsi[i] = huffsize[p];
|
| - }
|
| + EXTN(jsimd_idct_4x4_sse2):
|
| + push ebp
|
| +@@ -424,7 +424,7 @@
|
| + %define output_col(b) (b)+20 ; JDIMENSION output_col
|
| +
|
| + align 16
|
| +- global EXTN(jsimd_idct_2x2_sse2)
|
| ++ global EXTN(jsimd_idct_2x2_sse2) PRIVATE
|
| +
|
| + EXTN(jsimd_idct_2x2_sse2):
|
| + push ebp
|
| +@@ -589,3 +589,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jisseflt.asm
|
| +===================================================================
|
| +--- simd/jisseflt.asm (revision 829)
|
| ++++ simd/jisseflt.asm (working copy)
|
| +@@ -37,7 +37,7 @@
|
| + SECTION SEG_CONST
|
|
|
| -+#ifndef USE_CLZ_INTRINSIC
|
| - if(!jpeg_nbits_table_init) {
|
| - for(i = 0; i < 65536; i++) {
|
| - int nbits = 0, temp = i;
|
| -@@ -280,6 +309,7 @@
|
| - }
|
| - jpeg_nbits_table_init = 1;
|
| - }
|
| -+#endif
|
| - }
|
| + alignz 16
|
| +- global EXTN(jconst_idct_float_sse)
|
| ++ global EXTN(jconst_idct_float_sse) PRIVATE
|
|
|
| + EXTN(jconst_idct_float_sse):
|
|
|
| -@@ -482,7 +512,7 @@
|
| - temp2 += temp3;
|
| +@@ -73,7 +73,7 @@
|
| + ; FAST_FLOAT workspace[DCTSIZE2]
|
|
|
| - /* Find the number of bits needed for the magnitude of the coefficient */
|
| -- nbits = jpeg_nbits_table[temp];
|
| -+ nbits = JPEG_NBITS(temp);
|
| + align 16
|
| +- global EXTN(jsimd_idct_float_sse)
|
| ++ global EXTN(jsimd_idct_float_sse) PRIVATE
|
|
|
| - /* Emit the Huffman-coded symbol for the number of bits */
|
| - code = dctbl->ehufco[nbits];
|
| -@@ -516,7 +546,7 @@
|
| - temp ^= temp3; \
|
| - temp -= temp3; \
|
| - temp2 += temp3; \
|
| -- nbits = jpeg_nbits_table[temp]; \
|
| -+ nbits = JPEG_NBITS_NONZERO(temp); \
|
| - /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
|
| - while (r > 15) { \
|
| - EMIT_BITS(code_0xf0, size_0xf0) \
|
| -Index: simd/jsimd_arm64.c
|
| + EXTN(jsimd_idct_float_sse):
|
| + push ebp
|
| +@@ -567,3 +567,6 @@
|
| + pop ebp
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jsimd.h
|
| ===================================================================
|
| ---- /dev/null
|
| -+++ simd/jsimd_arm64.c
|
| -@@ -0,0 +1,544 @@
|
| -+/*
|
| -+ * jsimd_arm64.c
|
| -+ *
|
| -+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| -+ * Copyright 2009-2011, 2013-2014 D. R. Commander
|
| -+ *
|
| -+ * Based on the x86 SIMD extension for IJG JPEG library,
|
| -+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| +--- simd/jsimd.h (revision 829)
|
| ++++ simd/jsimd.h (working copy)
|
| +@@ -2,19 +2,22 @@
|
| + * simd/jsimd.h
|
| + *
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++ * Copyright 2011 D. R. Commander
|
| + *
|
| + * Based on the x86 SIMD extension for IJG JPEG library,
|
| + * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| + * For conditions of distribution and use, see copyright notice in jsimdext.inc
|
| -+ *
|
| -+ * This file contains the interface between the "normal" portions
|
| -+ * of the library and the SIMD implementations when running on a
|
| -+ * 64-bit ARM architecture.
|
| -+ */
|
| -+
|
| -+#define JPEG_INTERNALS
|
| -+#include "../jinclude.h"
|
| -+#include "../jpeglib.h"
|
| -+#include "../jsimd.h"
|
| -+#include "../jdct.h"
|
| -+#include "../jsimddct.h"
|
| -+#include "jsimd.h"
|
| -+
|
| -+#include <stdio.h>
|
| -+#include <string.h>
|
| -+#include <ctype.h>
|
| -+
|
| -+static unsigned int simd_support = ~0;
|
| -+
|
| -+/*
|
| -+ * Check what SIMD accelerations are supported.
|
| -+ *
|
| -+ * FIXME: This code is racy under a multi-threaded environment.
|
| -+ */
|
| + *
|
| + */
|
| +
|
| + /* Bitmask for supported acceleration methods */
|
| +
|
| +-#define JSIMD_NONE 0x00
|
| +-#define JSIMD_MMX 0x01
|
| +-#define JSIMD_3DNOW 0x02
|
| +-#define JSIMD_SSE 0x04
|
| +-#define JSIMD_SSE2 0x08
|
| ++#define JSIMD_NONE 0x00
|
| ++#define JSIMD_MMX 0x01
|
| ++#define JSIMD_3DNOW 0x02
|
| ++#define JSIMD_SSE 0x04
|
| ++#define JSIMD_SSE2 0x08
|
| ++#define JSIMD_ARM_NEON 0x10
|
| +
|
| + /* Short forms of external names for systems with brain-damaged linkers. */
|
| +
|
| +@@ -27,6 +30,13 @@
|
| + #define jsimd_extbgrx_ycc_convert_mmx jSEXTBGRXYCCM
|
| + #define jsimd_extxbgr_ycc_convert_mmx jSEXTXBGRYCCM
|
| + #define jsimd_extxrgb_ycc_convert_mmx jSEXTXRGBYCCM
|
| ++#define jsimd_rgb_gray_convert_mmx jSRGBGRYM
|
| ++#define jsimd_extrgb_gray_convert_mmx jSEXTRGBGRYM
|
| ++#define jsimd_extrgbx_gray_convert_mmx jSEXTRGBXGRYM
|
| ++#define jsimd_extbgr_gray_convert_mmx jSEXTBGRGRYM
|
| ++#define jsimd_extbgrx_gray_convert_mmx jSEXTBGRXGRYM
|
| ++#define jsimd_extxbgr_gray_convert_mmx jSEXTXBGRGRYM
|
| ++#define jsimd_extxrgb_gray_convert_mmx jSEXTXRGBGRYM
|
| + #define jsimd_ycc_rgb_convert_mmx jSYCCRGBM
|
| + #define jsimd_ycc_extrgb_convert_mmx jSYCCEXTRGBM
|
| + #define jsimd_ycc_extrgbx_convert_mmx jSYCCEXTRGBXM
|
| +@@ -42,6 +52,14 @@
|
| + #define jsimd_extbgrx_ycc_convert_sse2 jSEXTBGRXYCCS2
|
| + #define jsimd_extxbgr_ycc_convert_sse2 jSEXTXBGRYCCS2
|
| + #define jsimd_extxrgb_ycc_convert_sse2 jSEXTXRGBYCCS2
|
| ++#define jconst_rgb_gray_convert_sse2 jSCRGBGRYS2
|
| ++#define jsimd_rgb_gray_convert_sse2 jSRGBGRYS2
|
| ++#define jsimd_extrgb_gray_convert_sse2 jSEXTRGBGRYS2
|
| ++#define jsimd_extrgbx_gray_convert_sse2 jSEXTRGBXGRYS2
|
| ++#define jsimd_extbgr_gray_convert_sse2 jSEXTBGRGRYS2
|
| ++#define jsimd_extbgrx_gray_convert_sse2 jSEXTBGRXGRYS2
|
| ++#define jsimd_extxbgr_gray_convert_sse2 jSEXTXBGRGRYS2
|
| ++#define jsimd_extxrgb_gray_convert_sse2 jSEXTXRGBGRYS2
|
| + #define jconst_ycc_rgb_convert_sse2 jSCYCCRGBS2
|
| + #define jsimd_ycc_rgb_convert_sse2 jSYCCRGBS2
|
| + #define jsimd_ycc_extrgb_convert_sse2 jSYCCEXTRGBS2
|
| +@@ -162,6 +180,35 @@
|
| + JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| + JDIMENSION output_row, int num_rows));
|
| +
|
| ++EXTERN(void) jsimd_rgb_gray_convert_mmx
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extrgb_gray_convert_mmx
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extrgbx_gray_convert_mmx
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extbgr_gray_convert_mmx
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extbgrx_gray_convert_mmx
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extxbgr_gray_convert_mmx
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extxrgb_gray_convert_mmx
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| +
|
| -+/*
|
| -+ * ARMv8 architectures support NEON extensions by default.
|
| -+ * It is no longer optional as it was with ARMv7.
|
| -+ */
|
| + EXTERN(void) jsimd_ycc_rgb_convert_mmx
|
| + JPP((JDIMENSION out_width,
|
| + JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| +@@ -221,6 +268,36 @@
|
| + JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| + JDIMENSION output_row, int num_rows));
|
| +
|
| ++extern const int jconst_rgb_gray_convert_sse2[];
|
| ++EXTERN(void) jsimd_rgb_gray_convert_sse2
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extrgb_gray_convert_sse2
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extrgbx_gray_convert_sse2
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extbgr_gray_convert_sse2
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extbgrx_gray_convert_sse2
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extxbgr_gray_convert_sse2
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extxrgb_gray_convert_sse2
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| +
|
| + extern const int jconst_ycc_rgb_convert_sse2[];
|
| + EXTERN(void) jsimd_ycc_rgb_convert_sse2
|
| + JPP((JDIMENSION out_width,
|
| +@@ -251,6 +328,64 @@
|
| + JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| + JSAMPARRAY output_buf, int num_rows));
|
| +
|
| ++EXTERN(void) jsimd_rgb_ycc_convert_neon
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extrgb_ycc_convert_neon
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extrgbx_ycc_convert_neon
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extbgr_ycc_convert_neon
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extbgrx_ycc_convert_neon
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extxbgr_ycc_convert_neon
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| ++EXTERN(void) jsimd_extxrgb_ycc_convert_neon
|
| ++ JPP((JDIMENSION img_width,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows));
|
| +
|
| -+LOCAL(void)
|
| -+init_simd (void)
|
| -+{
|
| -+ char *env = NULL;
|
| ++EXTERN(void) jsimd_ycc_rgb_convert_neon
|
| ++ JPP((JDIMENSION out_width,
|
| ++ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| ++ JSAMPARRAY output_buf, int num_rows));
|
| ++EXTERN(void) jsimd_ycc_extrgb_convert_neon
|
| ++ JPP((JDIMENSION out_width,
|
| ++ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| ++ JSAMPARRAY output_buf, int num_rows));
|
| ++EXTERN(void) jsimd_ycc_extrgbx_convert_neon
|
| ++ JPP((JDIMENSION out_width,
|
| ++ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| ++ JSAMPARRAY output_buf, int num_rows));
|
| ++EXTERN(void) jsimd_ycc_extbgr_convert_neon
|
| ++ JPP((JDIMENSION out_width,
|
| ++ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| ++ JSAMPARRAY output_buf, int num_rows));
|
| ++EXTERN(void) jsimd_ycc_extbgrx_convert_neon
|
| ++ JPP((JDIMENSION out_width,
|
| ++ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| ++ JSAMPARRAY output_buf, int num_rows));
|
| ++EXTERN(void) jsimd_ycc_extxbgr_convert_neon
|
| ++ JPP((JDIMENSION out_width,
|
| ++ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| ++ JSAMPARRAY output_buf, int num_rows));
|
| ++EXTERN(void) jsimd_ycc_extxrgb_convert_neon
|
| ++ JPP((JDIMENSION out_width,
|
| ++ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| ++ JSAMPARRAY output_buf, int num_rows));
|
| +
|
| -+ if (simd_support != ~0U)
|
| -+ return;
|
| + /* SIMD Downsample */
|
| + EXTERN(void) jsimd_h2v2_downsample_mmx
|
| + JPP((JDIMENSION image_width, int max_v_samp_factor,
|
| +@@ -387,6 +522,10 @@
|
| + JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
| + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
|
| +
|
| ++EXTERN(void) jsimd_h2v1_fancy_upsample_neon
|
| ++ JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
|
| ++ JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
|
| +
|
| -+ simd_support = 0;
|
| + /* SIMD Sample Conversion */
|
| + EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data,
|
| + JDIMENSION start_col,
|
| +@@ -396,6 +535,10 @@
|
| + JDIMENSION start_col,
|
| + DCTELEM * workspace));
|
| +
|
| ++EXTERN(void) jsimd_convsamp_neon JPP((JSAMPARRAY sample_data,
|
| ++ JDIMENSION start_col,
|
| ++ DCTELEM * workspace));
|
| +
|
| -+ simd_support |= JSIMD_ARM_NEON;
|
| + EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data,
|
| + JDIMENSION start_col,
|
| + FAST_FLOAT * workspace));
|
| +@@ -417,6 +560,8 @@
|
| + extern const int jconst_fdct_islow_sse2[];
|
| + EXTERN(void) jsimd_fdct_ifast_sse2 JPP((DCTELEM * data));
|
| +
|
| ++EXTERN(void) jsimd_fdct_ifast_neon JPP((DCTELEM * data));
|
| +
|
| -+ /* Force different settings through environment variables */
|
| -+ env = getenv("JSIMD_FORCENEON");
|
| -+ if ((env != NULL) && (strcmp(env, "1") == 0))
|
| -+ simd_support &= JSIMD_ARM_NEON;
|
| -+ env = getenv("JSIMD_FORCENONE");
|
| -+ if ((env != NULL) && (strcmp(env, "1") == 0))
|
| -+ simd_support = 0;
|
| -+}
|
| + EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data));
|
| +
|
| + extern const int jconst_fdct_float_sse[];
|
| +@@ -431,6 +576,10 @@
|
| + DCTELEM * divisors,
|
| + DCTELEM * workspace));
|
| +
|
| ++EXTERN(void) jsimd_quantize_neon JPP((JCOEFPTR coef_block,
|
| ++ DCTELEM * divisors,
|
| ++ DCTELEM * workspace));
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_rgb_ycc (void)
|
| -+{
|
| -+ init_simd();
|
| + EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block,
|
| + FAST_FLOAT * divisors,
|
| + FAST_FLOAT * workspace));
|
| +@@ -463,6 +612,15 @@
|
| + JSAMPARRAY output_buf,
|
| + JDIMENSION output_col));
|
| +
|
| ++EXTERN(void) jsimd_idct_2x2_neon JPP((void * dct_table,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf,
|
| ++ JDIMENSION output_col));
|
| ++EXTERN(void) jsimd_idct_4x4_neon JPP((void * dct_table,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf,
|
| ++ JDIMENSION output_col));
|
| +
|
| -+ return 0;
|
| -+}
|
| + /* SIMD Inverse DCT */
|
| + EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table,
|
| + JCOEFPTR coef_block,
|
| +@@ -484,6 +642,15 @@
|
| + JSAMPARRAY output_buf,
|
| + JDIMENSION output_col));
|
| +
|
| ++EXTERN(void) jsimd_idct_islow_neon JPP((void * dct_table,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf,
|
| ++ JDIMENSION output_col));
|
| ++EXTERN(void) jsimd_idct_ifast_neon JPP((void * dct_table,
|
| ++ JCOEFPTR coef_block,
|
| ++ JSAMPARRAY output_buf,
|
| ++ JDIMENSION output_col));
|
| +
|
| -+GLOBAL(int)
|
| + EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table,
|
| + JCOEFPTR coef_block,
|
| + JSAMPARRAY output_buf,
|
| +Index: simd/jsimd_i386.c
|
| +===================================================================
|
| +--- simd/jsimd_i386.c (revision 829)
|
| ++++ simd/jsimd_i386.c (working copy)
|
| +@@ -2,10 +2,11 @@
|
| + * jsimd_i386.c
|
| + *
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +- * Copyright 2009 D. R. Commander
|
| ++ * Copyright 2009-2011 D. R. Commander
|
| + *
|
| + * Based on the x86 SIMD extension for IJG JPEG library,
|
| + * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| ++ * For conditions of distribution and use, see copyright notice in jsimdext.inc
|
| + *
|
| + * This file contains the interface between the "normal" portions
|
| + * of the library and the SIMD implementations when running on a
|
| +@@ -40,7 +41,7 @@
|
| + {
|
| + char *env = NULL;
|
| +
|
| +- if (simd_support != ~0)
|
| ++ if (simd_support != ~0U)
|
| + return;
|
| +
|
| + simd_support = jpeg_simd_cpu_support();
|
| +@@ -51,15 +52,16 @@
|
| + simd_support &= JSIMD_MMX;
|
| + env = getenv("JSIMD_FORCE3DNOW");
|
| + if ((env != NULL) && (strcmp(env, "1") == 0))
|
| +- simd_support &= JSIMD_3DNOW;
|
| ++ simd_support &= JSIMD_3DNOW|JSIMD_MMX;
|
| + env = getenv("JSIMD_FORCESSE");
|
| + if ((env != NULL) && (strcmp(env, "1") == 0))
|
| +- simd_support &= JSIMD_SSE;
|
| ++ simd_support &= JSIMD_SSE|JSIMD_MMX;
|
| + env = getenv("JSIMD_FORCESSE2");
|
| + if ((env != NULL) && (strcmp(env, "1") == 0))
|
| + simd_support &= JSIMD_SSE2;
|
| + }
|
| +
|
| ++#ifndef JPEG_DECODE_ONLY
|
| + GLOBAL(int)
|
| + jsimd_can_rgb_ycc (void)
|
| + {
|
| +@@ -81,8 +83,31 @@
|
| +
|
| + return 0;
|
| + }
|
| ++#endif
|
| +
|
| + GLOBAL(int)
|
| +jsimd_can_rgb_gray (void)
|
| +{
|
| + init_simd();
|
| +
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_ycc_rgb (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| + /* The code is optimised for these values only */
|
| + if (BITS_IN_JSAMPLE != 8)
|
| + return 0;
|
| @@ -2116,2323 +15097,2031 @@ Index: simd/jsimd_arm64.c
|
| + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
|
| + return 0;
|
| +
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| ++ if ((simd_support & JSIMD_SSE2) &&
|
| ++ IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
|
| ++ return 1;
|
| ++ if (simd_support & JSIMD_MMX)
|
| + return 1;
|
| +
|
| + return 0;
|
| +}
|
| +
|
| +GLOBAL(int)
|
| -+jsimd_can_ycc_rgb565 (void)
|
| + jsimd_can_ycc_rgb (void)
|
| + {
|
| + init_simd();
|
| +@@ -104,6 +129,7 @@
|
| + return 0;
|
| + }
|
| +
|
| ++#ifndef JPEG_DECODE_ONLY
|
| + GLOBAL(void)
|
| + jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
| + JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| +@@ -119,6 +145,7 @@
|
| + mmxfct=jsimd_extrgb_ycc_convert_mmx;
|
| + break;
|
| + case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| + sse2fct=jsimd_extrgbx_ycc_convert_sse2;
|
| + mmxfct=jsimd_extrgbx_ycc_convert_mmx;
|
| + break;
|
| +@@ -127,14 +154,17 @@
|
| + mmxfct=jsimd_extbgr_ycc_convert_mmx;
|
| + break;
|
| + case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| + sse2fct=jsimd_extbgrx_ycc_convert_sse2;
|
| + mmxfct=jsimd_extbgrx_ycc_convert_mmx;
|
| + break;
|
| + case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| + sse2fct=jsimd_extxbgr_ycc_convert_sse2;
|
| + mmxfct=jsimd_extxbgr_ycc_convert_mmx;
|
| + break;
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| + sse2fct=jsimd_extxrgb_ycc_convert_sse2;
|
| + mmxfct=jsimd_extxrgb_ycc_convert_mmx;
|
| + break;
|
| +@@ -152,8 +182,62 @@
|
| + mmxfct(cinfo->image_width, input_buf,
|
| + output_buf, output_row, num_rows);
|
| + }
|
| ++#endif
|
| +
|
| + GLOBAL(void)
|
| ++jsimd_rgb_gray_convert (j_compress_ptr cinfo,
|
| ++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| ++ JDIMENSION output_row, int num_rows)
|
| +{
|
| -+ init_simd();
|
| ++ void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
| ++ void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
| ++
|
| ++ switch(cinfo->in_color_space)
|
| ++ {
|
| ++ case JCS_EXT_RGB:
|
| ++ sse2fct=jsimd_extrgb_gray_convert_sse2;
|
| ++ mmxfct=jsimd_extrgb_gray_convert_mmx;
|
| ++ break;
|
| ++ case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| ++ sse2fct=jsimd_extrgbx_gray_convert_sse2;
|
| ++ mmxfct=jsimd_extrgbx_gray_convert_mmx;
|
| ++ break;
|
| ++ case JCS_EXT_BGR:
|
| ++ sse2fct=jsimd_extbgr_gray_convert_sse2;
|
| ++ mmxfct=jsimd_extbgr_gray_convert_mmx;
|
| ++ break;
|
| ++ case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| ++ sse2fct=jsimd_extbgrx_gray_convert_sse2;
|
| ++ mmxfct=jsimd_extbgrx_gray_convert_mmx;
|
| ++ break;
|
| ++ case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| ++ sse2fct=jsimd_extxbgr_gray_convert_sse2;
|
| ++ mmxfct=jsimd_extxbgr_gray_convert_mmx;
|
| ++ break;
|
| ++ case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| ++ sse2fct=jsimd_extxrgb_gray_convert_sse2;
|
| ++ mmxfct=jsimd_extxrgb_gray_convert_mmx;
|
| ++ break;
|
| ++ default:
|
| ++ sse2fct=jsimd_rgb_gray_convert_sse2;
|
| ++ mmxfct=jsimd_rgb_gray_convert_mmx;
|
| ++ break;
|
| ++ }
|
| ++
|
| ++ if ((simd_support & JSIMD_SSE2) &&
|
| ++ IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
|
| ++ sse2fct(cinfo->image_width, input_buf,
|
| ++ output_buf, output_row, num_rows);
|
| ++ else if (simd_support & JSIMD_MMX)
|
| ++ mmxfct(cinfo->image_width, input_buf,
|
| ++ output_buf, output_row, num_rows);
|
| ++}
|
| +
|
| ++GLOBAL(void)
|
| + jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
| + JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| + JSAMPARRAY output_buf, int num_rows)
|
| +@@ -168,6 +252,7 @@
|
| + mmxfct=jsimd_ycc_extrgb_convert_mmx;
|
| + break;
|
| + case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| + sse2fct=jsimd_ycc_extrgbx_convert_sse2;
|
| + mmxfct=jsimd_ycc_extrgbx_convert_mmx;
|
| + break;
|
| +@@ -176,14 +261,17 @@
|
| + mmxfct=jsimd_ycc_extbgr_convert_mmx;
|
| + break;
|
| + case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| + sse2fct=jsimd_ycc_extbgrx_convert_sse2;
|
| + mmxfct=jsimd_ycc_extbgrx_convert_mmx;
|
| + break;
|
| + case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| + sse2fct=jsimd_ycc_extxbgr_convert_sse2;
|
| + mmxfct=jsimd_ycc_extxbgr_convert_mmx;
|
| + break;
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| + sse2fct=jsimd_ycc_extxrgb_convert_sse2;
|
| + mmxfct=jsimd_ycc_extxrgb_convert_mmx;
|
| + break;
|
| +@@ -202,6 +290,7 @@
|
| + input_row, output_buf, num_rows);
|
| + }
|
| +
|
| ++#ifndef JPEG_DECODE_ONLY
|
| + GLOBAL(int)
|
| + jsimd_can_h2v2_downsample (void)
|
| + {
|
| +@@ -267,6 +356,7 @@
|
| + compptr->v_samp_factor, compptr->width_in_blocks,
|
| + input_data, output_data);
|
| + }
|
| ++#endif
|
| +
|
| + GLOBAL(int)
|
| + jsimd_can_h2v2_upsample (void)
|
| +@@ -382,7 +472,7 @@
|
| + {
|
| + if ((simd_support & JSIMD_SSE2) &&
|
| + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
|
| +- jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
|
| ++ jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
|
| + compptr->downsampled_width, input_data, output_data_ptr);
|
| + else if (simd_support & JSIMD_MMX)
|
| + jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
|
| +@@ -460,6 +550,7 @@
|
| + mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
|
| + break;
|
| + case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| + sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
|
| + mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
|
| + break;
|
| +@@ -468,14 +559,17 @@
|
| + mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
|
| + break;
|
| + case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| + sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
|
| + mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
|
| + break;
|
| + case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| + sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
|
| + mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
|
| + break;
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| + sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
|
| + mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
|
| + break;
|
| +@@ -510,6 +604,7 @@
|
| + mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
|
| + break;
|
| + case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| + sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
|
| + mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
|
| + break;
|
| +@@ -518,14 +613,17 @@
|
| + mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
|
| + break;
|
| + case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| + sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
|
| + mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
|
| + break;
|
| + case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| + sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
|
| + mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
|
| + break;
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| + sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
|
| + mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
|
| + break;
|
| +@@ -544,6 +642,7 @@
|
| + in_row_group_ctr, output_buf);
|
| + }
|
| +
|
| ++#ifndef JPEG_DECODE_ONLY
|
| + GLOBAL(int)
|
| + jsimd_can_convsamp (void)
|
| + {
|
| +@@ -763,6 +862,7 @@
|
| + else if (simd_support & JSIMD_3DNOW)
|
| + jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
|
| + }
|
| ++#endif
|
| +
|
| + GLOBAL(int)
|
| + jsimd_can_idct_2x2 (void)
|
| +@@ -953,4 +1053,3 @@
|
| + jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
|
| + output_buf, output_col);
|
| + }
|
| +-
|
| +Index: simd/jsimd_x86_64.c
|
| +===================================================================
|
| +--- simd/jsimd_x86_64.c (revision 829)
|
| ++++ simd/jsimd_x86_64.c (working copy)
|
| +@@ -2,10 +2,11 @@
|
| + * jsimd_x86_64.c
|
| + *
|
| + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| +- * Copyright 2009 D. R. Commander
|
| ++ * Copyright 2009-2011 D. R. Commander
|
| + *
|
| + * Based on the x86 SIMD extension for IJG JPEG library,
|
| + * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
| ++ * For conditions of distribution and use, see copyright notice in jsimdext.inc
|
| + *
|
| + * This file contains the interface between the "normal" portions
|
| + * of the library and the SIMD implementations when running on a
|
| +@@ -18,16 +19,17 @@
|
| + #include "../jsimd.h"
|
| + #include "../jdct.h"
|
| + #include "../jsimddct.h"
|
| +-#include "simd/jsimd.h"
|
| ++#include "jsimd.h"
|
| +
|
| + /*
|
| + * In the PIC cases, we have no guarantee that constants will keep
|
| + * their alignment. This macro allows us to verify it at runtime.
|
| + */
|
| +-#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
|
| ++#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
|
| +
|
| + #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
|
| +
|
| ++#ifndef JPEG_DECODE_ONLY
|
| + GLOBAL(int)
|
| + jsimd_can_rgb_ycc (void)
|
| + {
|
| +@@ -44,8 +46,26 @@
|
| +
|
| + return 1;
|
| + }
|
| ++#endif
|
| +
|
| + GLOBAL(int)
|
| ++jsimd_can_rgb_gray (void)
|
| ++{
|
| + /* The code is optimised for these values only */
|
| + if (BITS_IN_JSAMPLE != 8)
|
| + return 0;
|
| + if (sizeof(JDIMENSION) != 4)
|
| + return 0;
|
| ++ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
|
| ++ return 0;
|
| +
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ return 1;
|
| -+
|
| -+ return 0;
|
| -+}
|
| ++ if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
|
| ++ return 0;
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
| -+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| -+ JDIMENSION output_row, int num_rows)
|
| -+{
|
| ++ return 1;
|
| +}
|
| +
|
| -+GLOBAL(void)
|
| ++GLOBAL(int)
|
| + jsimd_can_ycc_rgb (void)
|
| + {
|
| + /* The code is optimised for these values only */
|
| +@@ -62,6 +82,7 @@
|
| + return 1;
|
| + }
|
| +
|
| ++#ifndef JPEG_DECODE_ONLY
|
| + GLOBAL(void)
|
| + jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
| + JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| +@@ -75,6 +96,7 @@
|
| + sse2fct=jsimd_extrgb_ycc_convert_sse2;
|
| + break;
|
| + case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| + sse2fct=jsimd_extrgbx_ycc_convert_sse2;
|
| + break;
|
| + case JCS_EXT_BGR:
|
| +@@ -81,12 +103,15 @@
|
| + sse2fct=jsimd_extbgr_ycc_convert_sse2;
|
| + break;
|
| + case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| + sse2fct=jsimd_extbgrx_ycc_convert_sse2;
|
| + break;
|
| + case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| + sse2fct=jsimd_extxbgr_ycc_convert_sse2;
|
| + break;
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| + sse2fct=jsimd_extxrgb_ycc_convert_sse2;
|
| + break;
|
| + default:
|
| +@@ -96,8 +121,48 @@
|
| +
|
| + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
|
| + }
|
| ++#endif
|
| +
|
| + GLOBAL(void)
|
| +jsimd_rgb_gray_convert (j_compress_ptr cinfo,
|
| + JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
| + JDIMENSION output_row, int num_rows)
|
| +{
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
| -+ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| -+ JSAMPARRAY output_buf, int num_rows)
|
| -+{
|
| -+ void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
|
| ++ void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
| +
|
| -+ switch(cinfo->out_color_space) {
|
| ++ switch(cinfo->in_color_space)
|
| ++ {
|
| + case JCS_EXT_RGB:
|
| -+ neonfct=jsimd_ycc_extrgb_convert_neon;
|
| ++ sse2fct=jsimd_extrgb_gray_convert_sse2;
|
| + break;
|
| + case JCS_EXT_RGBX:
|
| + case JCS_EXT_RGBA:
|
| -+ neonfct=jsimd_ycc_extrgbx_convert_neon;
|
| ++ sse2fct=jsimd_extrgbx_gray_convert_sse2;
|
| + break;
|
| + case JCS_EXT_BGR:
|
| -+ neonfct=jsimd_ycc_extbgr_convert_neon;
|
| ++ sse2fct=jsimd_extbgr_gray_convert_sse2;
|
| + break;
|
| + case JCS_EXT_BGRX:
|
| + case JCS_EXT_BGRA:
|
| -+ neonfct=jsimd_ycc_extbgrx_convert_neon;
|
| ++ sse2fct=jsimd_extbgrx_gray_convert_sse2;
|
| + break;
|
| + case JCS_EXT_XBGR:
|
| + case JCS_EXT_ABGR:
|
| -+ neonfct=jsimd_ycc_extxbgr_convert_neon;
|
| ++ sse2fct=jsimd_extxbgr_gray_convert_sse2;
|
| + break;
|
| + case JCS_EXT_XRGB:
|
| + case JCS_EXT_ARGB:
|
| -+ neonfct=jsimd_ycc_extxrgb_convert_neon;
|
| ++ sse2fct=jsimd_extxrgb_gray_convert_sse2;
|
| + break;
|
| + default:
|
| -+ neonfct=jsimd_ycc_extrgb_convert_neon;
|
| ++ sse2fct=jsimd_rgb_gray_convert_sse2;
|
| + break;
|
| + }
|
| +
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
|
| -+ JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| -+ JSAMPARRAY output_buf, int num_rows)
|
| -+{
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
|
| -+ output_buf, num_rows);
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_h2v2_downsample (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_h2v1_downsample (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
| -+ JSAMPARRAY input_data, JSAMPARRAY output_data)
|
| -+{
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
| -+ JSAMPARRAY input_data, JSAMPARRAY output_data)
|
| -+{
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_h2v2_upsample (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_h2v1_upsample (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
|
| -+ jpeg_component_info * compptr,
|
| -+ JSAMPARRAY input_data,
|
| -+ JSAMPARRAY * output_data_ptr)
|
| -+{
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
|
| -+ jpeg_component_info * compptr,
|
| -+ JSAMPARRAY input_data,
|
| -+ JSAMPARRAY * output_data_ptr)
|
| -+{
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_h2v2_fancy_upsample (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_h2v1_fancy_upsample (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
|
| -+ jpeg_component_info * compptr,
|
| -+ JSAMPARRAY input_data,
|
| -+ JSAMPARRAY * output_data_ptr)
|
| -+{
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
|
| -+ jpeg_component_info * compptr,
|
| -+ JSAMPARRAY input_data,
|
| -+ JSAMPARRAY * output_data_ptr)
|
| -+{
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_h2v2_merged_upsample (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_h2v1_merged_upsample (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
|
| -+ JSAMPIMAGE input_buf,
|
| -+ JDIMENSION in_row_group_ctr,
|
| -+ JSAMPARRAY output_buf)
|
| -+{
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
|
| -+ JSAMPIMAGE input_buf,
|
| -+ JDIMENSION in_row_group_ctr,
|
| -+ JSAMPARRAY output_buf)
|
| -+{
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_convsamp (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_convsamp_float (void)
|
| -+{
|
| -+ init_simd();
|
| -+
|
| -+ return 0;
|
| -+}
|
| -+
|
| -+GLOBAL(void)
|
| -+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
|
| -+ DCTELEM * workspace)
|
| -+{
|
| ++ sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
|
| +}
|
| +
|
| +GLOBAL(void)
|
| -+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
|
| -+ FAST_FLOAT * workspace)
|
| -+{
|
| -+}
|
| -+
|
| -+GLOBAL(int)
|
| -+jsimd_can_fdct_islow (void)
|
| -+{
|
| -+ init_simd();
|
| + jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
| + JSAMPIMAGE input_buf, JDIMENSION input_row,
|
| + JSAMPARRAY output_buf, int num_rows)
|
| +@@ -110,6 +175,7 @@
|
| + sse2fct=jsimd_ycc_extrgb_convert_sse2;
|
| + break;
|
| + case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| + sse2fct=jsimd_ycc_extrgbx_convert_sse2;
|
| + break;
|
| + case JCS_EXT_BGR:
|
| +@@ -116,12 +182,15 @@
|
| + sse2fct=jsimd_ycc_extbgr_convert_sse2;
|
| + break;
|
| + case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| + sse2fct=jsimd_ycc_extbgrx_convert_sse2;
|
| + break;
|
| + case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| + sse2fct=jsimd_ycc_extxbgr_convert_sse2;
|
| + break;
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| + sse2fct=jsimd_ycc_extxrgb_convert_sse2;
|
| + break;
|
| + default:
|
| +@@ -132,6 +201,7 @@
|
| + sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
| + }
|
| +
|
| ++#ifndef JPEG_DECODE_ONLY
|
| + GLOBAL(int)
|
| + jsimd_can_h2v2_downsample (void)
|
| + {
|
| +@@ -177,6 +247,7 @@
|
| + compptr->width_in_blocks,
|
| + input_data, output_data);
|
| + }
|
| ++#endif
|
| +
|
| + GLOBAL(int)
|
| + jsimd_can_h2v2_upsample (void)
|
| +@@ -260,7 +331,7 @@
|
| + JSAMPARRAY input_data,
|
| + JSAMPARRAY * output_data_ptr)
|
| + {
|
| +- jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
|
| ++ jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
|
| + compptr->downsampled_width,
|
| + input_data, output_data_ptr);
|
| + }
|
| +@@ -320,6 +391,7 @@
|
| + sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| + sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_BGR:
|
| +@@ -326,12 +398,15 @@
|
| + sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| + sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| + sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| + sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
|
| + break;
|
| + default:
|
| +@@ -356,6 +431,7 @@
|
| + sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_RGBX:
|
| ++ case JCS_EXT_RGBA:
|
| + sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_BGR:
|
| +@@ -362,12 +438,15 @@
|
| + sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_BGRX:
|
| ++ case JCS_EXT_BGRA:
|
| + sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_XBGR:
|
| ++ case JCS_EXT_ABGR:
|
| + sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
|
| + break;
|
| + case JCS_EXT_XRGB:
|
| ++ case JCS_EXT_ARGB:
|
| + sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
|
| + break;
|
| + default:
|
| +@@ -378,6 +457,7 @@
|
| + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
|
| + }
|
| +
|
| ++#ifndef JPEG_DECODE_ONLY
|
| + GLOBAL(int)
|
| + jsimd_can_convsamp (void)
|
| + {
|
| +@@ -528,6 +608,7 @@
|
| + {
|
| + jsimd_quantize_float_sse2(coef_block, divisors, workspace);
|
| + }
|
| ++#endif
|
| +
|
| + GLOBAL(int)
|
| + jsimd_can_idct_2x2 (void)
|
| +@@ -677,4 +758,3 @@
|
| + jsimd_idct_float_sse2(compptr->dct_table, coef_block,
|
| + output_buf, output_col);
|
| + }
|
| +-
|
| +Index: simd/jsimdcfg.inc.h
|
| +===================================================================
|
| +--- simd/jsimdcfg.inc.h (revision 829)
|
| ++++ simd/jsimdcfg.inc.h (working copy)
|
| +@@ -15,26 +15,54 @@
|
| + #include "../jmorecfg.h"
|
| + #include "jsimd.h"
|
| +
|
| +-#define define(var) %define _cpp_protection_##var
|
| +-#define definev(var) %define _cpp_protection_##var var
|
| +-
|
| + ;
|
| + ; -- jpeglib.h
|
| + ;
|
| +
|
| +-definev(DCTSIZE)
|
| +-definev(DCTSIZE2)
|
| ++%define _cpp_protection_DCTSIZE DCTSIZE
|
| ++%define _cpp_protection_DCTSIZE2 DCTSIZE2
|
| +
|
| + ;
|
| + ; -- jmorecfg.h
|
| + ;
|
| +
|
| +-definev(RGB_RED)
|
| +-definev(RGB_GREEN)
|
| +-definev(RGB_BLUE)
|
| ++%define _cpp_protection_RGB_RED RGB_RED
|
| ++%define _cpp_protection_RGB_GREEN RGB_GREEN
|
| ++%define _cpp_protection_RGB_BLUE RGB_BLUE
|
| ++%define _cpp_protection_RGB_PIXELSIZE RGB_PIXELSIZE
|
| +
|
| +-definev(RGB_PIXELSIZE)
|
| ++%define _cpp_protection_EXT_RGB_RED EXT_RGB_RED
|
| ++%define _cpp_protection_EXT_RGB_GREEN EXT_RGB_GREEN
|
| ++%define _cpp_protection_EXT_RGB_BLUE EXT_RGB_BLUE
|
| ++%define _cpp_protection_EXT_RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
| +
|
| ++%define _cpp_protection_EXT_RGBX_RED EXT_RGBX_RED
|
| ++%define _cpp_protection_EXT_RGBX_GREEN EXT_RGBX_GREEN
|
| ++%define _cpp_protection_EXT_RGBX_BLUE EXT_RGBX_BLUE
|
| ++%define _cpp_protection_EXT_RGBX_PIXELSIZE EXT_RGBX_PIXELSIZE
|
| +
|
| -+ return 0;
|
| -+}
|
| ++%define _cpp_protection_EXT_BGR_RED EXT_BGR_RED
|
| ++%define _cpp_protection_EXT_BGR_GREEN EXT_BGR_GREEN
|
| ++%define _cpp_protection_EXT_BGR_BLUE EXT_BGR_BLUE
|
| ++%define _cpp_protection_EXT_BGR_PIXELSIZE EXT_BGR_PIXELSIZE
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_fdct_ifast (void)
|
| -+{
|
| -+ init_simd();
|
| ++%define _cpp_protection_EXT_BGRX_RED EXT_BGRX_RED
|
| ++%define _cpp_protection_EXT_BGRX_GREEN EXT_BGRX_GREEN
|
| ++%define _cpp_protection_EXT_BGRX_BLUE EXT_BGRX_BLUE
|
| ++%define _cpp_protection_EXT_BGRX_PIXELSIZE EXT_BGRX_PIXELSIZE
|
| +
|
| -+ return 0;
|
| -+}
|
| ++%define _cpp_protection_EXT_XBGR_RED EXT_XBGR_RED
|
| ++%define _cpp_protection_EXT_XBGR_GREEN EXT_XBGR_GREEN
|
| ++%define _cpp_protection_EXT_XBGR_BLUE EXT_XBGR_BLUE
|
| ++%define _cpp_protection_EXT_XBGR_PIXELSIZE EXT_XBGR_PIXELSIZE
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_fdct_float (void)
|
| -+{
|
| -+ init_simd();
|
| ++%define _cpp_protection_EXT_XRGB_RED EXT_XRGB_RED
|
| ++%define _cpp_protection_EXT_XRGB_GREEN EXT_XRGB_GREEN
|
| ++%define _cpp_protection_EXT_XRGB_BLUE EXT_XRGB_BLUE
|
| ++%define _cpp_protection_EXT_XRGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
| +
|
| -+ return 0;
|
| -+}
|
| ++%define RGBX_FILLER_0XFF 1
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_fdct_islow (DCTELEM * data)
|
| -+{
|
| -+}
|
| + ; Representation of a single sample (pixel element value).
|
| + ; On this SIMD implementation, this must be 'unsigned char'.
|
| + ;
|
| +@@ -42,7 +70,7 @@
|
| + %define JSAMPLE byte ; unsigned char
|
| + %define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)
|
| +
|
| +-definev(CENTERJSAMPLE)
|
| ++%define _cpp_protection_CENTERJSAMPLE CENTERJSAMPLE
|
| +
|
| + ; Representation of a DCT frequency coefficient.
|
| + ; On this SIMD implementation, this must be 'short'.
|
| +@@ -95,74 +123,74 @@
|
| + ; -- jsimd.h
|
| + ;
|
| +
|
| +-definev(JSIMD_NONE)
|
| +-definev(JSIMD_MMX)
|
| +-definev(JSIMD_3DNOW)
|
| +-definev(JSIMD_SSE)
|
| +-definev(JSIMD_SSE2)
|
| ++%define _cpp_protection_JSIMD_NONE JSIMD_NONE
|
| ++%define _cpp_protection_JSIMD_MMX JSIMD_MMX
|
| ++%define _cpp_protection_JSIMD_3DNOW JSIMD_3DNOW
|
| ++%define _cpp_protection_JSIMD_SSE JSIMD_SSE
|
| ++%define _cpp_protection_JSIMD_SSE2 JSIMD_SSE2
|
| +
|
| + ; Short forms of external names for systems with brain-damaged linkers.
|
| + ;
|
| + #ifdef NEED_SHORT_EXTERNAL_NAMES
|
| +-definev(jpeg_simd_cpu_support)
|
| +-definev(jsimd_rgb_ycc_convert_mmx)
|
| +-definev(jsimd_ycc_rgb_convert_mmx)
|
| +-definev(jconst_rgb_ycc_convert_sse2)
|
| +-definev(jsimd_rgb_ycc_convert_sse2)
|
| +-definev(jconst_ycc_rgb_convert_sse2)
|
| +-definev(jsimd_ycc_rgb_convert_sse2)
|
| +-definev(jsimd_h2v2_downsample_mmx)
|
| +-definev(jsimd_h2v1_downsample_mmx)
|
| +-definev(jsimd_h2v2_downsample_sse2)
|
| +-definev(jsimd_h2v1_downsample_sse2)
|
| +-definev(jsimd_h2v2_upsample_mmx)
|
| +-definev(jsimd_h2v1_upsample_mmx)
|
| +-definev(jsimd_h2v1_fancy_upsample_mmx)
|
| +-definev(jsimd_h2v2_fancy_upsample_mmx)
|
| +-definev(jsimd_h2v1_merged_upsample_mmx)
|
| +-definev(jsimd_h2v2_merged_upsample_mmx)
|
| +-definev(jsimd_h2v2_upsample_sse2)
|
| +-definev(jsimd_h2v1_upsample_sse2)
|
| +-definev(jconst_fancy_upsample_sse2)
|
| +-definev(jsimd_h2v1_fancy_upsample_sse2)
|
| +-definev(jsimd_h2v2_fancy_upsample_sse2)
|
| +-definev(jconst_merged_upsample_sse2)
|
| +-definev(jsimd_h2v1_merged_upsample_sse2)
|
| +-definev(jsimd_h2v2_merged_upsample_sse2)
|
| +-definev(jsimd_convsamp_mmx)
|
| +-definev(jsimd_convsamp_sse2)
|
| +-definev(jsimd_convsamp_float_3dnow)
|
| +-definev(jsimd_convsamp_float_sse)
|
| +-definev(jsimd_convsamp_float_sse2)
|
| +-definev(jsimd_fdct_islow_mmx)
|
| +-definev(jsimd_fdct_ifast_mmx)
|
| +-definev(jconst_fdct_islow_sse2)
|
| +-definev(jsimd_fdct_islow_sse2)
|
| +-definev(jconst_fdct_ifast_sse2)
|
| +-definev(jsimd_fdct_ifast_sse2)
|
| +-definev(jsimd_fdct_float_3dnow)
|
| +-definev(jconst_fdct_float_sse)
|
| +-definev(jsimd_fdct_float_sse)
|
| +-definev(jsimd_quantize_mmx)
|
| +-definev(jsimd_quantize_sse2)
|
| +-definev(jsimd_quantize_float_3dnow)
|
| +-definev(jsimd_quantize_float_sse)
|
| +-definev(jsimd_quantize_float_sse2)
|
| +-definev(jsimd_idct_2x2_mmx)
|
| +-definev(jsimd_idct_4x4_mmx)
|
| +-definev(jconst_idct_red_sse2)
|
| +-definev(jsimd_idct_2x2_sse2)
|
| +-definev(jsimd_idct_4x4_sse2)
|
| +-definev(jsimd_idct_islow_mmx)
|
| +-definev(jsimd_idct_ifast_mmx)
|
| +-definev(jconst_idct_islow_sse2)
|
| +-definev(jsimd_idct_islow_sse2)
|
| +-definev(jconst_idct_ifast_sse2)
|
| +-definev(jsimd_idct_ifast_sse2)
|
| +-definev(jsimd_idct_float_3dnow)
|
| +-definev(jconst_idct_float_sse)
|
| +-definev(jsimd_idct_float_sse)
|
| +-definev(jconst_idct_float_sse2)
|
| +-definev(jsimd_idct_float_sse2)
|
| ++%define _cpp_protection_jpeg_simd_cpu_support jpeg_simd_cpu_support
|
| ++%define _cpp_protection_jsimd_rgb_ycc_convert_mmx jsimd_rgb_ycc_convert_mmx
|
| ++%define _cpp_protection_jsimd_ycc_rgb_convert_mmx jsimd_ycc_rgb_convert_mmx
|
| ++%define _cpp_protection_jconst_rgb_ycc_convert_sse2 jconst_rgb_ycc_convert_sse2
|
| ++%define _cpp_protection_jsimd_rgb_ycc_convert_sse2 jsimd_rgb_ycc_convert_sse2
|
| ++%define _cpp_protection_jconst_ycc_rgb_convert_sse2 jconst_ycc_rgb_convert_sse2
|
| ++%define _cpp_protection_jsimd_ycc_rgb_convert_sse2 jsimd_ycc_rgb_convert_sse2
|
| ++%define _cpp_protection_jsimd_h2v2_downsample_mmx jsimd_h2v2_downsample_mmx
|
| ++%define _cpp_protection_jsimd_h2v1_downsample_mmx jsimd_h2v1_downsample_mmx
|
| ++%define _cpp_protection_jsimd_h2v2_downsample_sse2 jsimd_h2v2_downsample_sse2
|
| ++%define _cpp_protection_jsimd_h2v1_downsample_sse2 jsimd_h2v1_downsample_sse2
|
| ++%define _cpp_protection_jsimd_h2v2_upsample_mmx jsimd_h2v2_upsample_mmx
|
| ++%define _cpp_protection_jsimd_h2v1_upsample_mmx jsimd_h2v1_upsample_mmx
|
| ++%define _cpp_protection_jsimd_h2v1_fancy_upsample_mmx jsimd_h2v1_fancy_upsample_mmx
|
| ++%define _cpp_protection_jsimd_h2v2_fancy_upsample_mmx jsimd_h2v2_fancy_upsample_mmx
|
| ++%define _cpp_protection_jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_merged_upsample_mmx
|
| ++%define _cpp_protection_jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_merged_upsample_mmx
|
| ++%define _cpp_protection_jsimd_h2v2_upsample_sse2 jsimd_h2v2_upsample_sse2
|
| ++%define _cpp_protection_jsimd_h2v1_upsample_sse2 jsimd_h2v1_upsample_sse2
|
| ++%define _cpp_protection_jconst_fancy_upsample_sse2 jconst_fancy_upsample_sse2
|
| ++%define _cpp_protection_jsimd_h2v1_fancy_upsample_sse2 jsimd_h2v1_fancy_upsample_sse2
|
| ++%define _cpp_protection_jsimd_h2v2_fancy_upsample_sse2 jsimd_h2v2_fancy_upsample_sse2
|
| ++%define _cpp_protection_jconst_merged_upsample_sse2 jconst_merged_upsample_sse2
|
| ++%define _cpp_protection_jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_merged_upsample_sse2
|
| ++%define _cpp_protection_jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_merged_upsample_sse2
|
| ++%define _cpp_protection_jsimd_convsamp_mmx jsimd_convsamp_mmx
|
| ++%define _cpp_protection_jsimd_convsamp_sse2 jsimd_convsamp_sse2
|
| ++%define _cpp_protection_jsimd_convsamp_float_3dnow jsimd_convsamp_float_3dnow
|
| ++%define _cpp_protection_jsimd_convsamp_float_sse jsimd_convsamp_float_sse
|
| ++%define _cpp_protection_jsimd_convsamp_float_sse2 jsimd_convsamp_float_sse2
|
| ++%define _cpp_protection_jsimd_fdct_islow_mmx jsimd_fdct_islow_mmx
|
| ++%define _cpp_protection_jsimd_fdct_ifast_mmx jsimd_fdct_ifast_mmx
|
| ++%define _cpp_protection_jconst_fdct_islow_sse2 jconst_fdct_islow_sse2
|
| ++%define _cpp_protection_jsimd_fdct_islow_sse2 jsimd_fdct_islow_sse2
|
| ++%define _cpp_protection_jconst_fdct_ifast_sse2 jconst_fdct_ifast_sse2
|
| ++%define _cpp_protection_jsimd_fdct_ifast_sse2 jsimd_fdct_ifast_sse2
|
| ++%define _cpp_protection_jsimd_fdct_float_3dnow jsimd_fdct_float_3dnow
|
| ++%define _cpp_protection_jconst_fdct_float_sse jconst_fdct_float_sse
|
| ++%define _cpp_protection_jsimd_fdct_float_sse jsimd_fdct_float_sse
|
| ++%define _cpp_protection_jsimd_quantize_mmx jsimd_quantize_mmx
|
| ++%define _cpp_protection_jsimd_quantize_sse2 jsimd_quantize_sse2
|
| ++%define _cpp_protection_jsimd_quantize_float_3dnow jsimd_quantize_float_3dnow
|
| ++%define _cpp_protection_jsimd_quantize_float_sse jsimd_quantize_float_sse
|
| ++%define _cpp_protection_jsimd_quantize_float_sse2 jsimd_quantize_float_sse2
|
| ++%define _cpp_protection_jsimd_idct_2x2_mmx jsimd_idct_2x2_mmx
|
| ++%define _cpp_protection_jsimd_idct_4x4_mmx jsimd_idct_4x4_mmx
|
| ++%define _cpp_protection_jconst_idct_red_sse2 jconst_idct_red_sse2
|
| ++%define _cpp_protection_jsimd_idct_2x2_sse2 jsimd_idct_2x2_sse2
|
| ++%define _cpp_protection_jsimd_idct_4x4_sse2 jsimd_idct_4x4_sse2
|
| ++%define _cpp_protection_jsimd_idct_islow_mmx jsimd_idct_islow_mmx
|
| ++%define _cpp_protection_jsimd_idct_ifast_mmx jsimd_idct_ifast_mmx
|
| ++%define _cpp_protection_jconst_idct_islow_sse2 jconst_idct_islow_sse2
|
| ++%define _cpp_protection_jsimd_idct_islow_sse2 jsimd_idct_islow_sse2
|
| ++%define _cpp_protection_jconst_idct_ifast_sse2 jconst_idct_ifast_sse2
|
| ++%define _cpp_protection_jsimd_idct_ifast_sse2 jsimd_idct_ifast_sse2
|
| ++%define _cpp_protection_jsimd_idct_float_3dnow jsimd_idct_float_3dnow
|
| ++%define _cpp_protection_jconst_idct_float_sse jconst_idct_float_sse
|
| ++%define _cpp_protection_jsimd_idct_float_sse jsimd_idct_float_sse
|
| ++%define _cpp_protection_jconst_idct_float_sse2 jconst_idct_float_sse2
|
| ++%define _cpp_protection_jsimd_idct_float_sse2 jsimd_idct_float_sse2
|
| + #endif /* NEED_SHORT_EXTERNAL_NAMES */
|
| +
|
| +Index: simd/jsimdcpu.asm
|
| +===================================================================
|
| +--- simd/jsimdcpu.asm (revision 829)
|
| ++++ simd/jsimdcpu.asm (working copy)
|
| +@@ -29,7 +29,7 @@
|
| + ;
|
| +
|
| + align 16
|
| +- global EXTN(jpeg_simd_cpu_support)
|
| ++ global EXTN(jpeg_simd_cpu_support) PRIVATE
|
| +
|
| + EXTN(jpeg_simd_cpu_support):
|
| + push ebx
|
| +@@ -100,3 +100,6 @@
|
| + pop ebx
|
| + ret
|
| +
|
| ++; For some reason, the OS X linker does not honor the request to align the
|
| ++; segment unless we do this.
|
| ++ align 16
|
| +Index: simd/jsimdext.inc
|
| +===================================================================
|
| +--- simd/jsimdext.inc (revision 829)
|
| ++++ simd/jsimdext.inc (working copy)
|
| +@@ -2,6 +2,7 @@
|
| + ; jsimdext.inc - common declarations
|
| + ;
|
| + ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
| ++; Copyright 2010 D. R. Commander
|
| + ;
|
| + ; Based on
|
| + ; x86 SIMD extension for IJG JPEG library - version 1.02
|
| +@@ -37,9 +38,28 @@
|
| +
|
| + ; -- segment definition --
|
| + ;
|
| ++%ifdef __YASM_VER__
|
| ++%define SEG_TEXT .text align=16
|
| ++%define SEG_CONST .rdata align=16
|
| ++%else
|
| + %define SEG_TEXT .text align=16 public use32 class=CODE
|
| + %define SEG_CONST .rdata align=16 public use32 class=CONST
|
| ++%endif
|
| +
|
| ++%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)--------
|
| ++; * Microsoft Visual C++
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_fdct_ifast (DCTELEM * data)
|
| -+{
|
| -+}
|
| ++; -- segment definition --
|
| ++;
|
| ++%ifdef __YASM_VER__
|
| ++%define SEG_TEXT .text align=16
|
| ++%define SEG_CONST .rdata align=16
|
| ++%else
|
| ++%define SEG_TEXT .text align=16 public use64 class=CODE
|
| ++%define SEG_CONST .rdata align=16 public use64 class=CONST
|
| ++%endif
|
| ++%define EXTN(name) name ; foo() -> foo
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_fdct_float (FAST_FLOAT * data)
|
| -+{
|
| -+}
|
| + %elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
|
| + ; * Borland C++ (Win32)
|
| +
|
| +@@ -53,6 +73,12 @@
|
| + ; * *BSD family Unix using elf format
|
| + ; * Unix System V, including Solaris x86, UnixWare and SCO Unix
|
| +
|
| ++; PIC is the default on Linux
|
| ++%define PIC
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_quantize (void)
|
| -+{
|
| -+ init_simd();
|
| ++; mark stack as non-executable
|
| ++section .note.GNU-stack noalloc noexec nowrite progbits
|
| +
|
| -+ return 0;
|
| -+}
|
| + ; -- segment definition --
|
| + ;
|
| + %ifdef __x86_64__
|
| +@@ -280,7 +306,44 @@
|
| + %endmacro
|
| +
|
| + %ifdef __x86_64__
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_quantize_float (void)
|
| -+{
|
| -+ init_simd();
|
| ++%ifdef WIN64
|
| +
|
| -+ return 0;
|
| -+}
|
| + %imacro collect_args 0
|
| ++ push r12
|
| ++ push r13
|
| ++ push r14
|
| ++ push r15
|
| ++ mov r10, rcx
|
| ++ mov r11, rdx
|
| ++ mov r12, r8
|
| ++ mov r13, r9
|
| ++ mov r14, [rax+48]
|
| ++ mov r15, [rax+56]
|
| ++ push rsi
|
| ++ push rdi
|
| ++ sub rsp, SIZEOF_XMMWORD
|
| ++ movaps XMMWORD [rsp], xmm6
|
| ++ sub rsp, SIZEOF_XMMWORD
|
| ++ movaps XMMWORD [rsp], xmm7
|
| ++%endmacro
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
|
| -+ DCTELEM * workspace)
|
| -+{
|
| -+}
|
| ++%imacro uncollect_args 0
|
| ++ movaps xmm7, XMMWORD [rsp]
|
| ++ add rsp, SIZEOF_XMMWORD
|
| ++ movaps xmm6, XMMWORD [rsp]
|
| ++ add rsp, SIZEOF_XMMWORD
|
| ++ pop rdi
|
| ++ pop rsi
|
| ++ pop r15
|
| ++ pop r14
|
| ++ pop r13
|
| ++ pop r12
|
| ++%endmacro
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
|
| -+ FAST_FLOAT * workspace)
|
| -+{
|
| -+}
|
| ++%else
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_idct_2x2 (void)
|
| -+{
|
| -+ init_simd();
|
| ++%imacro collect_args 0
|
| + push r10
|
| + push r11
|
| + push r12
|
| +@@ -306,9 +369,21 @@
|
| +
|
| + %endif
|
| +
|
| ++%endif
|
| +
|
| -+ /* The code is optimised for these values only */
|
| -+ if (DCTSIZE != 8)
|
| -+ return 0;
|
| -+ if (sizeof(JCOEF) != 2)
|
| -+ return 0;
|
| -+ if (BITS_IN_JSAMPLE != 8)
|
| -+ return 0;
|
| -+ if (sizeof(JDIMENSION) != 4)
|
| -+ return 0;
|
| -+ if (sizeof(ISLOW_MULT_TYPE) != 2)
|
| -+ return 0;
|
| + ; --------------------------------------------------------------------------
|
| + ; Defines picked up from the C headers
|
| + ;
|
| + %include "jsimdcfg.inc"
|
| +
|
| ++; Begin chromium edits
|
| ++%ifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
|
| ++%define PRIVATE :private_extern
|
| ++%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
|
| ++%define PRIVATE :hidden
|
| ++%else
|
| ++%define PRIVATE
|
| ++%endif
|
| ++; End chromium edits
|
| +
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ return 1;
|
| + ; --------------------------------------------------------------------------
|
| +Index: turbojpeg.h
|
| +===================================================================
|
| +--- turbojpeg.h (revision 829)
|
| ++++ turbojpeg.h (working copy)
|
| +@@ -1,231 +1,932 @@
|
| +-/* Copyright (C)2004 Landmark Graphics Corporation
|
| +- * Copyright (C)2005, 2006 Sun Microsystems, Inc.
|
| +- * Copyright (C)2009 D. R. Commander
|
| ++/*
|
| ++ * Copyright (C)2009-2013 D. R. Commander. All Rights Reserved.
|
| + *
|
| +- * This library is free software and may be redistributed and/or modified under
|
| +- * the terms of the wxWindows Library License, Version 3.1 or (at your option)
|
| +- * any later version. The full license is in the LICENSE.txt file included
|
| +- * with this distribution.
|
| ++ * Redistribution and use in source and binary forms, with or without
|
| ++ * modification, are permitted provided that the following conditions are met:
|
| + *
|
| +- * This library is distributed in the hope that it will be useful,
|
| +- * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| +- * wxWindows Library License for more details.
|
| ++ * - Redistributions of source code must retain the above copyright notice,
|
| ++ * this list of conditions and the following disclaimer.
|
| ++ * - Redistributions in binary form must reproduce the above copyright notice,
|
| ++ * this list of conditions and the following disclaimer in the documentation
|
| ++ * and/or other materials provided with the distribution.
|
| ++ * - Neither the name of the libjpeg-turbo Project nor the names of its
|
| ++ * contributors may be used to endorse or promote products derived from this
|
| ++ * software without specific prior written permission.
|
| ++ *
|
| ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
|
| ++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
| ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
| ++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| ++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| ++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
| ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| ++ * POSSIBILITY OF SUCH DAMAGE.
|
| + */
|
| +
|
| +-#if (defined(_MSC_VER) || defined(__CYGWIN__) || defined(__MINGW32__)) && defined(_WIN32) && defined(DLLDEFINE)
|
| ++#ifndef __TURBOJPEG_H__
|
| ++#define __TURBOJPEG_H__
|
| +
|
| -+ return 0;
|
| -+}
|
| ++#if defined(_WIN32) && defined(DLLDEFINE)
|
| + #define DLLEXPORT __declspec(dllexport)
|
| + #else
|
| + #define DLLEXPORT
|
| + #endif
|
| +-
|
| + #define DLLCALL
|
| +
|
| +-/* Subsampling */
|
| +-#define NUMSUBOPT 4
|
| +
|
| +-enum {TJ_444=0, TJ_422, TJ_420, TJ_GRAYSCALE};
|
| ++/**
|
| ++ * @addtogroup TurboJPEG
|
| ++ * TurboJPEG API. This API provides an interface for generating, decoding, and
|
| ++ * transforming planar YUV and JPEG images in memory.
|
| ++ *
|
| ++ * @{
|
| ++ */
|
| +
|
| +-/* Flags */
|
| +-#define TJ_BGR 1
|
| +-#define TJ_BOTTOMUP 2
|
| +-#define TJ_FORCEMMX 8 /* Force IPP to use MMX code even if SSE available */
|
| +-#define TJ_FORCESSE 16 /* Force IPP to use SSE1 code even if SSE2 available */
|
| +-#define TJ_FORCESSE2 32 /* Force IPP to use SSE2 code (useful if auto-detect is not working properly) */
|
| +-#define TJ_ALPHAFIRST 64 /* BGR buffer is ABGR and RGB buffer is ARGB */
|
| +-#define TJ_FORCESSE3 128 /* Force IPP to use SSE3 code (useful if auto-detect is not working properly) */
|
| +-#define TJ_FASTUPSAMPLE 256 /* Use fast, inaccurate 4:2:2 and 4:2:0 YUV upsampling routines in libjpeg decompressor */
|
| +
|
| ++/**
|
| ++ * The number of chrominance subsampling options
|
| ++ */
|
| ++#define TJ_NUMSAMP 5
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_idct_4x4 (void)
|
| ++/**
|
| ++ * Chrominance subsampling options.
|
| ++ * When an image is converted from the RGB to the YCbCr colorspace as part of
|
| ++ * the JPEG compression process, some of the Cb and Cr (chrominance) components
|
| ++ * can be discarded or averaged together to produce a smaller image with little
|
| ++ * perceptible loss of image clarity (the human eye is more sensitive to small
|
| ++ * changes in brightness than small changes in color.) This is called
|
| ++ * "chrominance subsampling".
|
| ++ * <p>
|
| ++ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
|
| ++ * convention of the digital video community, the TurboJPEG API uses "YUV" to
|
| ++ * refer to an image format consisting of Y, Cb, and Cr image planes.
|
| ++ */
|
| ++enum TJSAMP
|
| +{
|
| -+ init_simd();
|
| ++ /**
|
| ++ * 4:4:4 chrominance subsampling (no chrominance subsampling). The JPEG or
|
| ++ * YUV image will contain one chrominance component for every pixel in the
|
| ++ * source image.
|
| ++ */
|
| ++ TJSAMP_444=0,
|
| ++ /**
|
| ++ * 4:2:2 chrominance subsampling. The JPEG or YUV image will contain one
|
| ++ * chrominance component for every 2x1 block of pixels in the source image.
|
| ++ */
|
| ++ TJSAMP_422,
|
| ++ /**
|
| ++ * 4:2:0 chrominance subsampling. The JPEG or YUV image will contain one
|
| ++ * chrominance component for every 2x2 block of pixels in the source image.
|
| ++ */
|
| ++ TJSAMP_420,
|
| ++ /**
|
| ++ * Grayscale. The JPEG or YUV image will contain no chrominance components.
|
| ++ */
|
| ++ TJSAMP_GRAY,
|
| ++ /**
|
| ++ * 4:4:0 chrominance subsampling. The JPEG or YUV image will contain one
|
| ++ * chrominance component for every 1x2 block of pixels in the source image.
|
| ++ * Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
|
| ++ */
|
| ++ TJSAMP_440
|
| ++};
|
| +
|
| -+ /* The code is optimised for these values only */
|
| -+ if (DCTSIZE != 8)
|
| -+ return 0;
|
| -+ if (sizeof(JCOEF) != 2)
|
| -+ return 0;
|
| -+ if (BITS_IN_JSAMPLE != 8)
|
| -+ return 0;
|
| -+ if (sizeof(JDIMENSION) != 4)
|
| -+ return 0;
|
| -+ if (sizeof(ISLOW_MULT_TYPE) != 2)
|
| -+ return 0;
|
| ++/**
|
| ++ * MCU block width (in pixels) for a given level of chrominance subsampling.
|
| ++ * MCU block sizes:
|
| ++ * - 8x8 for no subsampling or grayscale
|
| ++ * - 16x8 for 4:2:2
|
| ++ * - 8x16 for 4:4:0
|
| ++ * - 16x16 for 4:2:0
|
| ++ */
|
| ++static const int tjMCUWidth[TJ_NUMSAMP] = {8, 16, 16, 8, 8};
|
| +
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ return 1;
|
| ++/**
|
| ++ * MCU block height (in pixels) for a given level of chrominance subsampling.
|
| ++ * MCU block sizes:
|
| ++ * - 8x8 for no subsampling or grayscale
|
| ++ * - 16x8 for 4:2:2
|
| ++ * - 8x16 for 4:4:0
|
| ++ * - 16x16 for 4:2:0
|
| ++ */
|
| ++static const int tjMCUHeight[TJ_NUMSAMP] = {8, 8, 16, 8, 16};
|
| +
|
| -+ return 0;
|
| -+}
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| -+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| -+ JDIMENSION output_col)
|
| -+{
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
|
| -+ output_col);
|
| -+}
|
| ++/**
|
| ++ * The number of pixel formats
|
| ++ */
|
| ++#define TJ_NUMPF 11
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| -+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| -+ JDIMENSION output_col)
|
| ++/**
|
| ++ * Pixel formats
|
| ++ */
|
| ++enum TJPF
|
| +{
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
|
| -+ output_col);
|
| -+}
|
| ++ /**
|
| ++ * RGB pixel format. The red, green, and blue components in the image are
|
| ++ * stored in 3-byte pixels in the order R, G, B from lowest to highest byte
|
| ++ * address within each pixel.
|
| ++ */
|
| ++ TJPF_RGB=0,
|
| ++ /**
|
| ++ * BGR pixel format. The red, green, and blue components in the image are
|
| ++ * stored in 3-byte pixels in the order B, G, R from lowest to highest byte
|
| ++ * address within each pixel.
|
| ++ */
|
| ++ TJPF_BGR,
|
| ++ /**
|
| ++ * RGBX pixel format. The red, green, and blue components in the image are
|
| ++ * stored in 4-byte pixels in the order R, G, B from lowest to highest byte
|
| ++ * address within each pixel. The X component is ignored when compressing
|
| ++ * and undefined when decompressing.
|
| ++ */
|
| ++ TJPF_RGBX,
|
| ++ /**
|
| ++ * BGRX pixel format. The red, green, and blue components in the image are
|
| ++ * stored in 4-byte pixels in the order B, G, R from lowest to highest byte
|
| ++ * address within each pixel. The X component is ignored when compressing
|
| ++ * and undefined when decompressing.
|
| ++ */
|
| ++ TJPF_BGRX,
|
| ++ /**
|
| ++ * XBGR pixel format. The red, green, and blue components in the image are
|
| ++ * stored in 4-byte pixels in the order R, G, B from highest to lowest byte
|
| ++ * address within each pixel. The X component is ignored when compressing
|
| ++ * and undefined when decompressing.
|
| ++ */
|
| ++ TJPF_XBGR,
|
| ++ /**
|
| ++ * XRGB pixel format. The red, green, and blue components in the image are
|
| ++ * stored in 4-byte pixels in the order B, G, R from highest to lowest byte
|
| ++ * address within each pixel. The X component is ignored when compressing
|
| ++ * and undefined when decompressing.
|
| ++ */
|
| ++ TJPF_XRGB,
|
| ++ /**
|
| ++ * Grayscale pixel format. Each 1-byte pixel represents a luminance
|
| ++ * (brightness) level from 0 to 255.
|
| ++ */
|
| ++ TJPF_GRAY,
|
| ++ /**
|
| ++ * RGBA pixel format. This is the same as @ref TJPF_RGBX, except that when
|
| ++ * decompressing, the X component is guaranteed to be 0xFF, which can be
|
| ++ * interpreted as an opaque alpha channel.
|
| ++ */
|
| ++ TJPF_RGBA,
|
| ++ /**
|
| ++ * BGRA pixel format. This is the same as @ref TJPF_BGRX, except that when
|
| ++ * decompressing, the X component is guaranteed to be 0xFF, which can be
|
| ++ * interpreted as an opaque alpha channel.
|
| ++ */
|
| ++ TJPF_BGRA,
|
| ++ /**
|
| ++ * ABGR pixel format. This is the same as @ref TJPF_XBGR, except that when
|
| ++ * decompressing, the X component is guaranteed to be 0xFF, which can be
|
| ++ * interpreted as an opaque alpha channel.
|
| ++ */
|
| ++ TJPF_ABGR,
|
| ++ /**
|
| ++ * ARGB pixel format. This is the same as @ref TJPF_XRGB, except that when
|
| ++ * decompressing, the X component is guaranteed to be 0xFF, which can be
|
| ++ * interpreted as an opaque alpha channel.
|
| ++ */
|
| ++ TJPF_ARGB
|
| ++};
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_idct_islow (void)
|
| -+{
|
| -+ init_simd();
|
| ++/**
|
| ++ * Red offset (in bytes) for a given pixel format. This specifies the number
|
| ++ * of bytes that the red component is offset from the start of the pixel. For
|
| ++ * instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>,
|
| ++ * then the red component will be <tt>pixel[tjRedOffset[TJ_BGRX]]</tt>.
|
| ++ */
|
| ++static const int tjRedOffset[TJ_NUMPF] = {0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1};
|
| ++/**
|
| ++ * Green offset (in bytes) for a given pixel format. This specifies the number
|
| ++ * of bytes that the green component is offset from the start of the pixel.
|
| ++ * For instance, if a pixel of format TJ_BGRX is stored in
|
| ++ * <tt>char pixel[]</tt>, then the green component will be
|
| ++ * <tt>pixel[tjGreenOffset[TJ_BGRX]]</tt>.
|
| ++ */
|
| ++static const int tjGreenOffset[TJ_NUMPF] = {1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2};
|
| ++/**
|
| ++ * Blue offset (in bytes) for a given pixel format. This specifies the number
|
| ++ * of bytes that the Blue component is offset from the start of the pixel. For
|
| ++ * instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>,
|
| ++ * then the blue component will be <tt>pixel[tjBlueOffset[TJ_BGRX]]</tt>.
|
| ++ */
|
| ++static const int tjBlueOffset[TJ_NUMPF] = {2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3};
|
| +
|
| -+ /* The code is optimised for these values only */
|
| -+ if (DCTSIZE != 8)
|
| -+ return 0;
|
| -+ if (sizeof(JCOEF) != 2)
|
| -+ return 0;
|
| -+ if (BITS_IN_JSAMPLE != 8)
|
| -+ return 0;
|
| -+ if (sizeof(JDIMENSION) != 4)
|
| -+ return 0;
|
| -+ if (sizeof(ISLOW_MULT_TYPE) != 2)
|
| -+ return 0;
|
| ++/**
|
| ++ * Pixel size (in bytes) for a given pixel format.
|
| ++ */
|
| ++static const int tjPixelSize[TJ_NUMPF] = {3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4};
|
| +
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ return 1;
|
| +
|
| -+ return 0;
|
| -+}
|
| ++/**
|
| ++ * The uncompressed source/destination image is stored in bottom-up (Windows,
|
| ++ * OpenGL) order, not top-down (X11) order.
|
| ++ */
|
| ++#define TJFLAG_BOTTOMUP 2
|
| ++/**
|
| ++ * Turn off CPU auto-detection and force TurboJPEG to use MMX code (if the
|
| ++ * underlying codec supports it.)
|
| ++ */
|
| ++#define TJFLAG_FORCEMMX 8
|
| ++/**
|
| ++ * Turn off CPU auto-detection and force TurboJPEG to use SSE code (if the
|
| ++ * underlying codec supports it.)
|
| ++ */
|
| ++#define TJFLAG_FORCESSE 16
|
| ++/**
|
| ++ * Turn off CPU auto-detection and force TurboJPEG to use SSE2 code (if the
|
| ++ * underlying codec supports it.)
|
| ++ */
|
| ++#define TJFLAG_FORCESSE2 32
|
| ++/**
|
| ++ * Turn off CPU auto-detection and force TurboJPEG to use SSE3 code (if the
|
| ++ * underlying codec supports it.)
|
| ++ */
|
| ++#define TJFLAG_FORCESSE3 128
|
| ++/**
|
| ++ * When decompressing an image that was compressed using chrominance
|
| ++ * subsampling, use the fastest chrominance upsampling algorithm available in
|
| ++ * the underlying codec. The default is to use smooth upsampling, which
|
| ++ * creates a smooth transition between neighboring chrominance components in
|
| ++ * order to reduce upsampling artifacts in the decompressed image.
|
| ++ */
|
| ++#define TJFLAG_FASTUPSAMPLE 256
|
| ++/**
|
| ++ * Disable buffer (re)allocation. If passed to #tjCompress2() or
|
| ++ * #tjTransform(), this flag will cause those functions to generate an error if
|
| ++ * the JPEG image buffer is invalid or too small rather than attempting to
|
| ++ * allocate or reallocate that buffer. This reproduces the behavior of earlier
|
| ++ * versions of TurboJPEG.
|
| ++ */
|
| ++#define TJFLAG_NOREALLOC 1024
|
| ++/**
|
| ++ * Use the fastest DCT/IDCT algorithm available in the underlying codec. The
|
| ++ * default if this flag is not specified is implementation-specific. For
|
| ++ * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
|
| ++ * algorithm by default when compressing, because this has been shown to have
|
| ++ * only a very slight effect on accuracy, but it uses the accurate algorithm
|
| ++ * when decompressing, because this has been shown to have a larger effect.
|
| ++ */
|
| ++#define TJFLAG_FASTDCT 2048
|
| ++/**
|
| ++ * Use the most accurate DCT/IDCT algorithm available in the underlying codec.
|
| ++ * The default if this flag is not specified is implementation-specific. For
|
| ++ * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
|
| ++ * algorithm by default when compressing, because this has been shown to have
|
| ++ * only a very slight effect on accuracy, but it uses the accurate algorithm
|
| ++ * when decompressing, because this has been shown to have a larger effect.
|
| ++ */
|
| ++#define TJFLAG_ACCURATEDCT 4096
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_idct_ifast (void)
|
| -+{
|
| -+ init_simd();
|
| +
|
| -+ /* The code is optimised for these values only */
|
| -+ if (DCTSIZE != 8)
|
| -+ return 0;
|
| -+ if (sizeof(JCOEF) != 2)
|
| -+ return 0;
|
| -+ if (BITS_IN_JSAMPLE != 8)
|
| -+ return 0;
|
| -+ if (sizeof(JDIMENSION) != 4)
|
| -+ return 0;
|
| -+ if (sizeof(IFAST_MULT_TYPE) != 2)
|
| -+ return 0;
|
| -+ if (IFAST_SCALE_BITS != 2)
|
| -+ return 0;
|
| ++/**
|
| ++ * The number of transform operations
|
| ++ */
|
| ++#define TJ_NUMXOP 8
|
| +
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ return 1;
|
| ++/**
|
| ++ * Transform operations for #tjTransform()
|
| ++ */
|
| ++enum TJXOP
|
| ++{
|
| ++ /**
|
| ++ * Do not transform the position of the image pixels
|
| ++ */
|
| ++ TJXOP_NONE=0,
|
| ++ /**
|
| ++ * Flip (mirror) image horizontally. This transform is imperfect if there
|
| ++ * are any partial MCU blocks on the right edge (see #TJXOPT_PERFECT.)
|
| ++ */
|
| ++ TJXOP_HFLIP,
|
| ++ /**
|
| ++ * Flip (mirror) image vertically. This transform is imperfect if there are
|
| ++ * any partial MCU blocks on the bottom edge (see #TJXOPT_PERFECT.)
|
| ++ */
|
| ++ TJXOP_VFLIP,
|
| ++ /**
|
| ++ * Transpose image (flip/mirror along upper left to lower right axis.) This
|
| ++ * transform is always perfect.
|
| ++ */
|
| ++ TJXOP_TRANSPOSE,
|
| ++ /**
|
| ++ * Transverse transpose image (flip/mirror along upper right to lower left
|
| ++ * axis.) This transform is imperfect if there are any partial MCU blocks in
|
| ++ * the image (see #TJXOPT_PERFECT.)
|
| ++ */
|
| ++ TJXOP_TRANSVERSE,
|
| ++ /**
|
| ++ * Rotate image clockwise by 90 degrees. This transform is imperfect if
|
| ++ * there are any partial MCU blocks on the bottom edge (see
|
| ++ * #TJXOPT_PERFECT.)
|
| ++ */
|
| ++ TJXOP_ROT90,
|
| ++ /**
|
| ++ * Rotate image 180 degrees. This transform is imperfect if there are any
|
| ++ * partial MCU blocks in the image (see #TJXOPT_PERFECT.)
|
| ++ */
|
| ++ TJXOP_ROT180,
|
| ++ /**
|
| ++ * Rotate image counter-clockwise by 90 degrees. This transform is imperfect
|
| ++ * if there are any partial MCU blocks on the right edge (see
|
| ++ * #TJXOPT_PERFECT.)
|
| ++ */
|
| ++ TJXOP_ROT270
|
| ++};
|
| +
|
| -+ return 0;
|
| -+}
|
| +
|
| -+GLOBAL(int)
|
| -+jsimd_can_idct_float (void)
|
| -+{
|
| -+ init_simd();
|
| ++/**
|
| ++ * This option will cause #tjTransform() to return an error if the transform is
|
| ++ * not perfect. Lossless transforms operate on MCU blocks, whose size depends
|
| ++ * on the level of chrominance subsampling used (see #tjMCUWidth
|
| ++ * and #tjMCUHeight.) If the image's width or height is not evenly divisible
|
| ++ * by the MCU block size, then there will be partial MCU blocks on the right
|
| ++ * and/or bottom edges. It is not possible to move these partial MCU blocks to
|
| ++ * the top or left of the image, so any transform that would require that is
|
| ++ * "imperfect." If this option is not specified, then any partial MCU blocks
|
| ++ * that cannot be transformed will be left in place, which will create
|
| ++ * odd-looking strips on the right or bottom edge of the image.
|
| ++ */
|
| ++#define TJXOPT_PERFECT 1
|
| ++/**
|
| ++ * This option will cause #tjTransform() to discard any partial MCU blocks that
|
| ++ * cannot be transformed.
|
| ++ */
|
| ++#define TJXOPT_TRIM 2
|
| ++/**
|
| ++ * This option will enable lossless cropping. See #tjTransform() for more
|
| ++ * information.
|
| ++ */
|
| ++#define TJXOPT_CROP 4
|
| ++/**
|
| ++ * This option will discard the color data in the input image and produce
|
| ++ * a grayscale output image.
|
| ++ */
|
| ++#define TJXOPT_GRAY 8
|
| ++/**
|
| ++ * This option will prevent #tjTransform() from outputting a JPEG image for
|
| ++ * this particular transform (this can be used in conjunction with a custom
|
| ++ * filter to capture the transformed DCT coefficients without transcoding
|
| ++ * them.)
|
| ++ */
|
| ++#define TJXOPT_NOOUTPUT 16
|
| +
|
| -+ return 0;
|
| -+}
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| -+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| -+ JDIMENSION output_col)
|
| ++/**
|
| ++ * Scaling factor
|
| ++ */
|
| ++typedef struct
|
| +{
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
|
| -+ output_col);
|
| -+}
|
| ++ /**
|
| ++ * Numerator
|
| ++ */
|
| ++ int num;
|
| ++ /**
|
| ++ * Denominator
|
| ++ */
|
| ++ int denom;
|
| ++} tjscalingfactor;
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| -+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| -+ JDIMENSION output_col)
|
| ++/**
|
| ++ * Cropping region
|
| ++ */
|
| ++typedef struct
|
| +{
|
| -+ if (simd_support & JSIMD_ARM_NEON)
|
| -+ jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
|
| -+ output_col);
|
| -+}
|
| ++ /**
|
| ++ * The left boundary of the cropping region. This must be evenly divisible
|
| ++ * by the MCU block width (see #tjMCUWidth.)
|
| ++ */
|
| ++ int x;
|
| ++ /**
|
| ++ * The upper boundary of the cropping region. This must be evenly divisible
|
| ++ * by the MCU block height (see #tjMCUHeight.)
|
| ++ */
|
| ++ int y;
|
| ++ /**
|
| ++ * The width of the cropping region. Setting this to 0 is the equivalent of
|
| ++ * setting it to the width of the source JPEG image - x.
|
| ++ */
|
| ++ int w;
|
| ++ /**
|
| ++ * The height of the cropping region. Setting this to 0 is the equivalent of
|
| ++ * setting it to the height of the source JPEG image - y.
|
| ++ */
|
| ++ int h;
|
| ++} tjregion;
|
| +
|
| -+GLOBAL(void)
|
| -+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
| -+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
| -+ JDIMENSION output_col)
|
| ++/**
|
| ++ * Lossless transform
|
| ++ */
|
| ++typedef struct tjtransform
|
| +{
|
| -+}
|
| -Index: simd/jsimd_arm64_neon.S
|
| -new file mode 100644
|
| -===================================================================
|
| ---- /dev/null
|
| -+++ simd/jsimd_arm64_neon.S
|
| -@@ -0,0 +1,1861 @@
|
| -+/*
|
| -+ * ARMv8 NEON optimizations for libjpeg-turbo
|
| ++ /**
|
| ++ * Cropping region
|
| ++ */
|
| ++ tjregion r;
|
| ++ /**
|
| ++ * One of the @ref TJXOP "transform operations"
|
| ++ */
|
| ++ int op;
|
| ++ /**
|
| ++ * The bitwise OR of one of more of the @ref TJXOPT_CROP "transform options"
|
| ++ */
|
| ++ int options;
|
| ++ /**
|
| ++ * Arbitrary data that can be accessed within the body of the callback
|
| ++ * function
|
| ++ */
|
| ++ void *data;
|
| ++ /**
|
| ++ * A callback function that can be used to modify the DCT coefficients
|
| ++ * after they are losslessly transformed but before they are transcoded to a
|
| ++ * new JPEG image. This allows for custom filters or other transformations
|
| ++ * to be applied in the frequency domain.
|
| ++ *
|
| ++ * @param coeffs pointer to an array of transformed DCT coefficients. (NOTE:
|
| ++ * this pointer is not guaranteed to be valid once the callback
|
| ++ * returns, so applications wishing to hand off the DCT coefficients
|
| ++ * to another function or library should make a copy of them within
|
| ++ * the body of the callback.)
|
| ++ * @param arrayRegion #tjregion structure containing the width and height of
|
| ++ * the array pointed to by <tt>coeffs</tt> as well as its offset
|
| ++ * relative to the component plane. TurboJPEG implementations may
|
| ++ * choose to split each component plane into multiple DCT coefficient
|
| ++ * arrays and call the callback function once for each array.
|
| ++ * @param planeRegion #tjregion structure containing the width and height of
|
| ++ * the component plane to which <tt>coeffs</tt> belongs
|
| ++ * @param componentID ID number of the component plane to which
|
| ++ * <tt>coeffs</tt> belongs (Y, Cb, and Cr have, respectively, ID's of
|
| ++ * 0, 1, and 2 in typical JPEG images.)
|
| ++ * @param transformID ID number of the transformed image to which
|
| ++ * <tt>coeffs</tt> belongs. This is the same as the index of the
|
| ++ * transform in the <tt>transforms</tt> array that was passed to
|
| ++ * #tjTransform().
|
| ++ * @param transform a pointer to a #tjtransform structure that specifies the
|
| ++ * parameters and/or cropping region for this transform
|
| ++ *
|
| ++ * @return 0 if the callback was successful, or -1 if an error occurred.
|
| ++ */
|
| ++ int (*customFilter)(short *coeffs, tjregion arrayRegion,
|
| ++ tjregion planeRegion, int componentIndex, int transformIndex,
|
| ++ struct tjtransform *transform);
|
| ++} tjtransform;
|
| ++
|
| ++/**
|
| ++ * TurboJPEG instance handle
|
| ++ */
|
| + typedef void* tjhandle;
|
| +
|
| +-#define TJPAD(p) (((p)+3)&(~3))
|
| +-#ifndef max
|
| +- #define max(a,b) ((a)>(b)?(a):(b))
|
| +-#endif
|
| +
|
| ++/**
|
| ++ * Pad the given width to the nearest 32-bit boundary
|
| ++ */
|
| ++#define TJPAD(width) (((width)+3)&(~3))
|
| ++
|
| ++/**
|
| ++ * Compute the scaled value of <tt>dimension</tt> using the given scaling
|
| ++ * factor. This macro performs the integer equivalent of <tt>ceil(dimension *
|
| ++ * scalingFactor)</tt>.
|
| ++ */
|
| ++#define TJSCALED(dimension, scalingFactor) ((dimension * scalingFactor.num \
|
| ++ + scalingFactor.denom - 1) / scalingFactor.denom)
|
| ++
|
| ++
|
| + #ifdef __cplusplus
|
| + extern "C" {
|
| + #endif
|
| +
|
| +-/* API follows */
|
| +
|
| ++/**
|
| ++ * Create a TurboJPEG compressor instance.
|
| ++ *
|
| ++ * @return a handle to the newly-created instance, or NULL if an error
|
| ++ * occurred (see #tjGetErrorStr().)
|
| ++ */
|
| ++DLLEXPORT tjhandle DLLCALL tjInitCompress(void);
|
| +
|
| +-/*
|
| +- tjhandle tjInitCompress(void)
|
| +
|
| +- Creates a new JPEG compressor instance, allocates memory for the structures,
|
| +- and returns a handle to the instance. Most applications will only
|
| +- need to call this once at the beginning of the program or once for each
|
| +- concurrent thread. Don't try to create a new instance every time you
|
| +- compress an image, because this will cause performance to suffer.
|
| +-
|
| +- RETURNS: NULL on error
|
| ++/**
|
| ++ * Compress an RGB or grayscale image into a JPEG image.
|
| + *
|
| -+ * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies).
|
| -+ * All rights reserved.
|
| -+ * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
| -+ * Copyright (C) 2013-2014, Linaro Limited
|
| -+ * Author: Ragesh Radhakrishnan <ragesh.r@linaro.org>
|
| ++ * @param handle a handle to a TurboJPEG compressor or transformer instance
|
| ++ * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels
|
| ++ * to be compressed
|
| ++ * @param width width (in pixels) of the source image
|
| ++ * @param pitch bytes per line of the source image. Normally, this should be
|
| ++ * <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded,
|
| ++ * or <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line of
|
| ++ * the image is padded to the nearest 32-bit boundary, as is the case
|
| ++ * for Windows bitmaps. You can also be clever and use this parameter
|
| ++ * to skip lines, etc. Setting this parameter to 0 is the equivalent of
|
| ++ * setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.
|
| ++ * @param height height (in pixels) of the source image
|
| ++ * @param pixelFormat pixel format of the source image (see @ref TJPF
|
| ++ * "Pixel formats".)
|
| ++ * @param jpegBuf address of a pointer to an image buffer that will receive the
|
| ++ * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer
|
| ++ * to accommodate the size of the JPEG image. Thus, you can choose to:
|
| ++ * -# pre-allocate the JPEG buffer with an arbitrary size using
|
| ++ * #tjAlloc() and let TurboJPEG grow the buffer as needed,
|
| ++ * -# set <tt>*jpegBuf</tt> to NULL to tell TurboJPEG to allocate the
|
| ++ * buffer for you, or
|
| ++ * -# pre-allocate the buffer to a "worst case" size determined by
|
| ++ * calling #tjBufSize(). This should ensure that the buffer never has
|
| ++ * to be re-allocated (setting #TJFLAG_NOREALLOC guarantees this.)
|
| ++ * .
|
| ++ * If you choose option 1, <tt>*jpegSize</tt> should be set to the
|
| ++ * size of your pre-allocated buffer. In any case, unless you have
|
| ++ * set #TJFLAG_NOREALLOC, you should always check <tt>*jpegBuf</tt> upon
|
| ++ * return from this function, as it may have changed.
|
| ++ * @param jpegSize pointer to an unsigned long variable that holds the size of
|
| ++ * the JPEG image buffer. If <tt>*jpegBuf</tt> points to a
|
| ++ * pre-allocated buffer, then <tt>*jpegSize</tt> should be set to the
|
| ++ * size of the buffer. Upon return, <tt>*jpegSize</tt> will contain the
|
| ++ * size of the JPEG image (in bytes.)
|
| ++ * @param jpegSubsamp the level of chrominance subsampling to be used when
|
| ++ * generating the JPEG image (see @ref TJSAMP
|
| ++ * "Chrominance subsampling options".)
|
| ++ * @param jpegQual the image quality of the generated JPEG image (1 = worst,
|
| ++ 100 = best)
|
| ++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
|
| ++ * "flags".
|
| + *
|
| -+ * This software is provided 'as-is', without any express or implied
|
| -+ * warranty. In no event will the authors be held liable for any damages
|
| -+ * arising from the use of this software.
|
| ++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
|
| + */
|
| +-DLLEXPORT tjhandle DLLCALL tjInitCompress(void);
|
| ++DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, unsigned char *srcBuf,
|
| ++ int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf,
|
| ++ unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags);
|
| +
|
| +
|
| +-/*
|
| +- int tjCompress(tjhandle j,
|
| +- unsigned char *srcbuf, int width, int pitch, int height, int pixelsize,
|
| +- unsigned char *dstbuf, unsigned long *size,
|
| +- int jpegsubsamp, int jpegqual, int flags)
|
| ++/**
|
| ++ * The maximum size of the buffer (in bytes) required to hold a JPEG image with
|
| ++ * the given parameters. The number of bytes returned by this function is
|
| ++ * larger than the size of the uncompressed source image. The reason for this
|
| ++ * is that the JPEG format uses 16-bit coefficients, and it is thus possible
|
| ++ * for a very high-quality JPEG image with very high-frequency content to
|
| ++ * expand rather than compress when converted to the JPEG format. Such images
|
| ++ * represent a very rare corner case, but since there is no way to predict the
|
| ++ * size of a JPEG image prior to compression, the corner case has to be
|
| ++ * handled.
|
| + *
|
| -+ * Permission is granted to anyone to use this software for any purpose,
|
| -+ * including commercial applications, and to alter it and redistribute it
|
| -+ * freely, subject to the following restrictions:
|
| ++ * @param width width of the image (in pixels)
|
| ++ * @param height height of the image (in pixels)
|
| ++ * @param jpegSubsamp the level of chrominance subsampling to be used when
|
| ++ * generating the JPEG image (see @ref TJSAMP
|
| ++ * "Chrominance subsampling options".)
|
| + *
|
| -+ * 1. The origin of this software must not be misrepresented; you must not
|
| -+ * claim that you wrote the original software. If you use this software
|
| -+ * in a product, an acknowledgment in the product documentation would be
|
| -+ * appreciated but is not required.
|
| -+ * 2. Altered source versions must be plainly marked as such, and must not be
|
| -+ * misrepresented as being the original software.
|
| -+ * 3. This notice may not be removed or altered from any source distribution.
|
| ++ * @return the maximum size of the buffer (in bytes) required to hold the
|
| ++ * image, or -1 if the arguments are out of bounds.
|
| + */
|
| -+
|
| -+#if defined(__linux__) && defined(__ELF__)
|
| -+.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
|
| -+#endif
|
| -+
|
| -+.text
|
| -+.arch armv8-a+fp+simd
|
| -+
|
| -+
|
| -+#define RESPECT_STRICT_ALIGNMENT 1
|
| -+
|
| -+
|
| -+/*****************************************************************************/
|
| -+
|
| -+/* Supplementary macro for setting function attributes */
|
| -+.macro asm_function fname
|
| -+#ifdef __APPLE__
|
| -+ .globl _\fname
|
| -+_\fname:
|
| -+#else
|
| -+ .global \fname
|
| -+#ifdef __ELF__
|
| -+ .hidden \fname
|
| -+ .type \fname, %function
|
| -+#endif
|
| -+\fname:
|
| -+#endif
|
| -+.endm
|
| -+
|
| -+/* Transpose elements of single 128 bit registers */
|
| -+.macro transpose_single x0,x1,xi,xilen,literal
|
| -+ ins \xi\xilen[0], \x0\xilen[0]
|
| -+ ins \x1\xilen[0], \x0\xilen[1]
|
| -+ trn1 \x0\literal, \x0\literal, \x1\literal
|
| -+ trn2 \x1\literal, \xi\literal, \x1\literal
|
| -+.endm
|
| -+
|
| -+/* Transpose elements of 2 differnet registers */
|
| -+.macro transpose x0,x1,xi,xilen,literal
|
| -+ mov \xi\xilen, \x0\xilen
|
| -+ trn1 \x0\literal, \x0\literal, \x1\literal
|
| -+ trn2 \x1\literal, \xi\literal, \x1\literal
|
| -+.endm
|
| -+
|
| -+/* Transpose a block of 4x4 coefficients in four 64-bit registers */
|
| -+.macro transpose_4x4_32 x0,x0len x1,x1len x2,x2len x3,x3len,xi,xilen
|
| -+ mov \xi\xilen, \x0\xilen
|
| -+ trn1 \x0\x0len, \x0\x0len, \x2\x2len
|
| -+ trn2 \x2\x2len, \xi\x0len, \x2\x2len
|
| -+ mov \xi\xilen, \x1\xilen
|
| -+ trn1 \x1\x1len, \x1\x1len, \x3\x3len
|
| -+ trn2 \x3\x3len, \xi\x1len, \x3\x3len
|
| -+.endm
|
| -+
|
| -+.macro transpose_4x4_16 x0,x0len x1,x1len, x2,x2len, x3,x3len,xi,xilen
|
| -+ mov \xi\xilen, \x0\xilen
|
| -+ trn1 \x0\x0len, \x0\x0len, \x1\x1len
|
| -+ trn2 \x1\x2len, \xi\x0len, \x1\x2len
|
| -+ mov \xi\xilen, \x2\xilen
|
| -+ trn1 \x2\x2len, \x2\x2len, \x3\x3len
|
| -+ trn2 \x3\x2len, \xi\x1len, \x3\x3len
|
| -+.endm
|
| -+
|
| -+.macro transpose_4x4 x0, x1, x2, x3,x5
|
| -+ transpose_4x4_16 \x0,.4h, \x1,.4h, \x2,.4h,\x3,.4h,\x5,.16b
|
| -+ transpose_4x4_32 \x0,.2s, \x1,.2s, \x2,.2s,\x3,.2s,\x5,.16b
|
| -+.endm
|
| -+
|
| -+
|
| -+#define CENTERJSAMPLE 128
|
| -+
|
| -+/*****************************************************************************/
|
| -+
|
| -+/*
|
| -+ * Perform dequantization and inverse DCT on one block of coefficients.
|
| ++DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height,
|
| ++ int jpegSubsamp);
|
| +
|
| +- [INPUT] j = instance handle previously returned from a call to
|
| +- tjInitCompress()
|
| +- [INPUT] srcbuf = pointer to user-allocated image buffer containing pixels in
|
| +- RGB(A) or BGR(A) form
|
| +- [INPUT] width = width (in pixels) of the source image
|
| +- [INPUT] pitch = bytes per line of the source image (width*pixelsize if the
|
| +- bitmap is unpadded, else TJPAD(width*pixelsize) if each line of the bitmap
|
| +- is padded to the nearest 32-bit boundary, such as is the case for Windows
|
| +- bitmaps. You can also be clever and use this parameter to skip lines, etc.,
|
| +- as long as the pitch is greater than 0.)
|
| +- [INPUT] height = height (in pixels) of the source image
|
| +- [INPUT] pixelsize = size (in bytes) of each pixel in the source image
|
| +- RGBA and BGRA: 4, RGB and BGR: 3
|
| +- [INPUT] dstbuf = pointer to user-allocated image buffer which will receive
|
| +- the JPEG image. Use the macro TJBUFSIZE(width, height) to determine
|
| +- the appropriate size for this buffer based on the image width and height.
|
| +- [OUTPUT] size = pointer to unsigned long which receives the size (in bytes)
|
| +- of the compressed image
|
| +- [INPUT] jpegsubsamp = Specifies either 4:2:0, 4:2:2, or 4:4:4 subsampling.
|
| +- When the image is converted from the RGB to YCbCr colorspace as part of the
|
| +- JPEG compression process, every other Cb and Cr (chrominance) pixel can be
|
| +- discarded to produce a smaller image with little perceptible loss of
|
| +- image clarity (the human eye is more sensitive to small changes in
|
| +- brightness than small changes in color.)
|
| +
|
| +- TJ_420: 4:2:0 subsampling. Discards every other Cb, Cr pixel in both
|
| +- horizontal and vertical directions.
|
| +- TJ_422: 4:2:2 subsampling. Discards every other Cb, Cr pixel only in
|
| +- the horizontal direction.
|
| +- TJ_444: no subsampling.
|
| +- TJ_GRAYSCALE: Generate grayscale JPEG image
|
| ++/**
|
| ++ * The size of the buffer (in bytes) required to hold a YUV planar image with
|
| ++ * the given parameters.
|
| ++ *
|
| ++ * @param width width of the image (in pixels)
|
| ++ * @param height height of the image (in pixels)
|
| ++ * @param subsamp level of chrominance subsampling in the image (see
|
| ++ * @ref TJSAMP "Chrominance subsampling options".)
|
| + *
|
| -+ * GLOBAL(void)
|
| -+ * jsimd_idct_islow_neon (void * dct_table, JCOEFPTR coef_block,
|
| -+ * JSAMPARRAY output_buf, JDIMENSION output_col)
|
| ++ * @return the size of the buffer (in bytes) required to hold the image, or
|
| ++ * -1 if the arguments are out of bounds.
|
| + */
|
| ++DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height,
|
| ++ int subsamp);
|
| +
|
| +- [INPUT] jpegqual = JPEG quality (an integer between 0 and 100 inclusive.)
|
| +- [INPUT] flags = the bitwise OR of one or more of the following
|
| +
|
| +- TJ_BGR: The components of each pixel in the source image are stored in
|
| +- B,G,R order, not R,G,B
|
| +- TJ_BOTTOMUP: The source image is stored in bottom-up (Windows) order,
|
| +- not top-down
|
| +- TJ_FORCEMMX: Valid only for the Intel Performance Primitives implementation
|
| +- of this codec-- force IPP to use MMX code (bypass CPU auto-detection)
|
| +- TJ_FORCESSE: Valid only for the Intel Performance Primitives implementation
|
| +- of this codec-- force IPP to use SSE code (bypass CPU auto-detection)
|
| +- TJ_FORCESSE2: Valid only for the Intel Performance Primitives implementation
|
| +- of this codec-- force IPP to use SSE2 code (bypass CPU auto-detection)
|
| +- TJ_FORCESSE3: Valid only for the Intel Performance Primitives implementation
|
| +- of this codec-- force IPP to use SSE3 code (bypass CPU auto-detection)
|
| ++/**
|
| ++ * Encode an RGB or grayscale image into a YUV planar image. This function
|
| ++ * uses the accelerated color conversion routines in TurboJPEG's underlying
|
| ++ * codec to produce a planar YUV image that is suitable for X Video.
|
| ++ * Specifically, if the chrominance components are subsampled along the
|
| ++ * horizontal dimension, then the width of the luminance plane is padded to the
|
| ++ * nearest multiple of 2 in the output image (same goes for the height of the
|
| ++ * luminance plane, if the chrominance components are subsampled along the
|
| ++ * vertical dimension.) Also, each line of each plane in the output image is
|
| ++ * padded to 4 bytes. Although this will work with any subsampling option, it
|
| ++ * is really only useful in combination with TJ_420, which produces an image
|
| ++ * compatible with the I420 (AKA "YUV420P") format.
|
| ++ * <p>
|
| ++ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
|
| ++ * convention of the digital video community, the TurboJPEG API uses "YUV" to
|
| ++ * refer to an image format consisting of Y, Cb, and Cr image planes.
|
| ++ *
|
| ++ * @param handle a handle to a TurboJPEG compressor or transformer instance
|
| ++ * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels
|
| ++ * to be encoded
|
| ++ * @param width width (in pixels) of the source image
|
| ++ * @param pitch bytes per line of the source image. Normally, this should be
|
| ++ * <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded,
|
| ++ * or <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line of
|
| ++ * the image is padded to the nearest 32-bit boundary, as is the case
|
| ++ * for Windows bitmaps. You can also be clever and use this parameter
|
| ++ * to skip lines, etc. Setting this parameter to 0 is the equivalent of
|
| ++ * setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.
|
| ++ * @param height height (in pixels) of the source image
|
| ++ * @param pixelFormat pixel format of the source image (see @ref TJPF
|
| ++ * "Pixel formats".)
|
| ++ * @param dstBuf pointer to an image buffer that will receive the YUV image.
|
| ++ * Use #tjBufSizeYUV() to determine the appropriate size for this buffer
|
| ++ * based on the image width, height, and level of chrominance
|
| ++ * subsampling.
|
| ++ * @param subsamp the level of chrominance subsampling to be used when
|
| ++ * generating the YUV image (see @ref TJSAMP
|
| ++ * "Chrominance subsampling options".)
|
| ++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
|
| ++ * "flags".
|
| ++ *
|
| ++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
|
| ++*/
|
| ++DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle,
|
| ++ unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat,
|
| ++ unsigned char *dstBuf, int subsamp, int flags);
|
| +
|
| +- RETURNS: 0 on success, -1 on error
|
| +
|
| -+#define FIX_0_298631336 (2446)
|
| -+#define FIX_0_390180644 (3196)
|
| -+#define FIX_0_541196100 (4433)
|
| -+#define FIX_0_765366865 (6270)
|
| -+#define FIX_0_899976223 (7373)
|
| -+#define FIX_1_175875602 (9633)
|
| -+#define FIX_1_501321110 (12299)
|
| -+#define FIX_1_847759065 (15137)
|
| -+#define FIX_1_961570560 (16069)
|
| -+#define FIX_2_053119869 (16819)
|
| -+#define FIX_2_562915447 (20995)
|
| -+#define FIX_3_072711026 (25172)
|
| -+
|
| -+#define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560)
|
| -+#define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644)
|
| -+#define FIX_0_541196100_MINUS_1_847759065 (FIX_0_541196100 - FIX_1_847759065)
|
| -+#define FIX_3_072711026_MINUS_2_562915447 (FIX_3_072711026 - FIX_2_562915447)
|
| -+#define FIX_0_298631336_MINUS_0_899976223 (FIX_0_298631336 - FIX_0_899976223)
|
| -+#define FIX_1_501321110_MINUS_0_899976223 (FIX_1_501321110 - FIX_0_899976223)
|
| -+#define FIX_2_053119869_MINUS_2_562915447 (FIX_2_053119869 - FIX_2_562915447)
|
| -+#define FIX_0_541196100_PLUS_0_765366865 (FIX_0_541196100 + FIX_0_765366865)
|
| -+
|
| -+/*
|
| -+ * Reference SIMD-friendly 1-D ISLOW iDCT C implementation.
|
| -+ * Uses some ideas from the comments in 'simd/jiss2int-64.asm'
|
| ++/**
|
| ++ * Create a TurboJPEG decompressor instance.
|
| ++ *
|
| ++ * @return a handle to the newly-created instance, or NULL if an error
|
| ++ * occurred (see #tjGetErrorStr().)
|
| + */
|
| +-DLLEXPORT int DLLCALL tjCompress(tjhandle j,
|
| +- unsigned char *srcbuf, int width, int pitch, int height, int pixelsize,
|
| +- unsigned char *dstbuf, unsigned long *size,
|
| +- int jpegsubsamp, int jpegqual, int flags);
|
| ++DLLEXPORT tjhandle DLLCALL tjInitDecompress(void);
|
| +
|
| +-DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height);
|
| +
|
| +-/*
|
| +- tjhandle tjInitDecompress(void)
|
| ++/**
|
| ++ * Retrieve information about a JPEG image without decompressing it.
|
| ++ *
|
| ++ * @param handle a handle to a TurboJPEG decompressor or transformer instance
|
| ++ * @param jpegBuf pointer to a buffer containing a JPEG image
|
| ++ * @param jpegSize size of the JPEG image (in bytes)
|
| ++ * @param width pointer to an integer variable that will receive the width (in
|
| ++ * pixels) of the JPEG image
|
| ++ * @param height pointer to an integer variable that will receive the height
|
| ++ * (in pixels) of the JPEG image
|
| ++ * @param jpegSubsamp pointer to an integer variable that will receive the
|
| ++ * level of chrominance subsampling used when compressing the JPEG image
|
| ++ * (see @ref TJSAMP "Chrominance subsampling options".)
|
| ++ *
|
| ++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
|
| ++*/
|
| ++DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle,
|
| ++ unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height,
|
| ++ int *jpegSubsamp);
|
| +
|
| +- Creates a new JPEG decompressor instance, allocates memory for the
|
| +- structures, and returns a handle to the instance. Most applications will
|
| +- only need to call this once at the beginning of the program or once for each
|
| +- concurrent thread. Don't try to create a new instance every time you
|
| +- decompress an image, because this will cause performance to suffer.
|
| +
|
| +- RETURNS: NULL on error
|
| ++/**
|
| ++ * Returns a list of fractional scaling factors that the JPEG decompressor in
|
| ++ * this implementation of TurboJPEG supports.
|
| ++ *
|
| ++ * @param numscalingfactors pointer to an integer variable that will receive
|
| ++ * the number of elements in the list
|
| ++ *
|
| ++ * @return a pointer to a list of fractional scaling factors, or NULL if an
|
| ++ * error is encountered (see #tjGetErrorStr().)
|
| + */
|
| +-DLLEXPORT tjhandle DLLCALL tjInitDecompress(void);
|
| ++DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors);
|
| +
|
| +
|
| +-/*
|
| +- int tjDecompressHeader(tjhandle j,
|
| +- unsigned char *srcbuf, unsigned long size,
|
| +- int *width, int *height)
|
| ++/**
|
| ++ * Decompress a JPEG image to an RGB or grayscale image.
|
| ++ *
|
| ++ * @param handle a handle to a TurboJPEG decompressor or transformer instance
|
| ++ * @param jpegBuf pointer to a buffer containing the JPEG image to decompress
|
| ++ * @param jpegSize size of the JPEG image (in bytes)
|
| ++ * @param dstBuf pointer to an image buffer that will receive the decompressed
|
| ++ * image. This buffer should normally be <tt>pitch * scaledHeight</tt>
|
| ++ * bytes in size, where <tt>scaledHeight</tt> can be determined by
|
| ++ * calling #TJSCALED() with the JPEG image height and one of the scaling
|
| ++ * factors returned by #tjGetScalingFactors(). The <tt>dstBuf</tt>
|
| ++ * pointer may also be used to decompress into a specific region of a
|
| ++ * larger buffer.
|
| ++ * @param width desired width (in pixels) of the destination image. If this is
|
| ++ * different than the width of the JPEG image being decompressed, then
|
| ++ * TurboJPEG will use scaling in the JPEG decompressor to generate the
|
| ++ * largest possible image that will fit within the desired width. If
|
| ++ * <tt>width</tt> is set to 0, then only the height will be considered
|
| ++ * when determining the scaled image size.
|
| ++ * @param pitch bytes per line of the destination image. Normally, this is
|
| ++ * <tt>scaledWidth * #tjPixelSize[pixelFormat]</tt> if the decompressed
|
| ++ * image is unpadded, else <tt>#TJPAD(scaledWidth *
|
| ++ * #tjPixelSize[pixelFormat])</tt> if each line of the decompressed
|
| ++ * image is padded to the nearest 32-bit boundary, as is the case for
|
| ++ * Windows bitmaps. (NOTE: <tt>scaledWidth</tt> can be determined by
|
| ++ * calling #TJSCALED() with the JPEG image width and one of the scaling
|
| ++ * factors returned by #tjGetScalingFactors().) You can also be clever
|
| ++ * and use the pitch parameter to skip lines, etc. Setting this
|
| ++ * parameter to 0 is the equivalent of setting it to <tt>scaledWidth
|
| ++ * * #tjPixelSize[pixelFormat]</tt>.
|
| ++ * @param height desired height (in pixels) of the destination image. If this
|
| ++ * is different than the height of the JPEG image being decompressed,
|
| ++ * then TurboJPEG will use scaling in the JPEG decompressor to generate
|
| ++ * the largest possible image that will fit within the desired height.
|
| ++ * If <tt>height</tt> is set to 0, then only the width will be
|
| ++ * considered when determining the scaled image size.
|
| ++ * @param pixelFormat pixel format of the destination image (see @ref
|
| ++ * TJPF "Pixel formats".)
|
| ++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
|
| ++ * "flags".
|
| ++ *
|
| ++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
|
| + */
|
| -+#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) \
|
| -+{ \
|
| -+ DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \
|
| -+ INT32 q1, q2, q3, q4, q5, q6, q7; \
|
| -+ INT32 tmp11_plus_tmp2, tmp11_minus_tmp2; \
|
| -+ \
|
| -+ /* 1-D iDCT input data */ \
|
| -+ row0 = xrow0; \
|
| -+ row1 = xrow1; \
|
| -+ row2 = xrow2; \
|
| -+ row3 = xrow3; \
|
| -+ row4 = xrow4; \
|
| -+ row5 = xrow5; \
|
| -+ row6 = xrow6; \
|
| -+ row7 = xrow7; \
|
| -+ \
|
| -+ q5 = row7 + row3; \
|
| -+ q4 = row5 + row1; \
|
| -+ q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \
|
| -+ MULTIPLY(q4, FIX_1_175875602); \
|
| -+ q7 = MULTIPLY(q5, FIX_1_175875602) + \
|
| -+ MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \
|
| -+ q2 = MULTIPLY(row2, FIX_0_541196100) + \
|
| -+ MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \
|
| -+ q4 = q6; \
|
| -+ q3 = ((INT32) row0 - (INT32) row4) << 13; \
|
| -+ q6 += MULTIPLY(row5, -FIX_2_562915447) + \
|
| -+ MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \
|
| -+ /* now we can use q1 (reloadable constants have been used up) */ \
|
| -+ q1 = q3 + q2; \
|
| -+ q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \
|
| -+ MULTIPLY(row1, -FIX_0_899976223); \
|
| -+ q5 = q7; \
|
| -+ q1 = q1 + q6; \
|
| -+ q7 += MULTIPLY(row7, -FIX_0_899976223) + \
|
| -+ MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \
|
| -+ \
|
| -+ /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \
|
| -+ tmp11_plus_tmp2 = q1; \
|
| -+ row1 = 0; \
|
| -+ \
|
| -+ q1 = q1 - q6; \
|
| -+ q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \
|
| -+ MULTIPLY(row3, -FIX_2_562915447); \
|
| -+ q1 = q1 - q6; \
|
| -+ q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \
|
| -+ MULTIPLY(row6, FIX_0_541196100); \
|
| -+ q3 = q3 - q2; \
|
| -+ \
|
| -+ /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \
|
| -+ tmp11_minus_tmp2 = q1; \
|
| -+ \
|
| -+ q1 = ((INT32) row0 + (INT32) row4) << 13; \
|
| -+ q2 = q1 + q6; \
|
| -+ q1 = q1 - q6; \
|
| -+ \
|
| -+ /* pick up the results */ \
|
| -+ tmp0 = q4; \
|
| -+ tmp1 = q5; \
|
| -+ tmp2 = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \
|
| -+ tmp3 = q7; \
|
| -+ tmp10 = q2; \
|
| -+ tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \
|
| -+ tmp12 = q3; \
|
| -+ tmp13 = q1; \
|
| -+}
|
| -+
|
| -+#define XFIX_0_899976223 v0.4h[0]
|
| -+#define XFIX_0_541196100 v0.4h[1]
|
| -+#define XFIX_2_562915447 v0.4h[2]
|
| -+#define XFIX_0_298631336_MINUS_0_899976223 v0.4h[3]
|
| -+#define XFIX_1_501321110_MINUS_0_899976223 v1.4h[0]
|
| -+#define XFIX_2_053119869_MINUS_2_562915447 v1.4h[1]
|
| -+#define XFIX_0_541196100_PLUS_0_765366865 v1.4h[2]
|
| -+#define XFIX_1_175875602 v1.4h[3]
|
| -+#define XFIX_1_175875602_MINUS_0_390180644 v2.4h[0]
|
| -+#define XFIX_0_541196100_MINUS_1_847759065 v2.4h[1]
|
| -+#define XFIX_3_072711026_MINUS_2_562915447 v2.4h[2]
|
| -+#define XFIX_1_175875602_MINUS_1_961570560 v2.4h[3]
|
| -+
|
| -+.balign 16
|
| -+jsimd_idct_islow_neon_consts:
|
| -+ .short FIX_0_899976223 /* d0[0] */
|
| -+ .short FIX_0_541196100 /* d0[1] */
|
| -+ .short FIX_2_562915447 /* d0[2] */
|
| -+ .short FIX_0_298631336_MINUS_0_899976223 /* d0[3] */
|
| -+ .short FIX_1_501321110_MINUS_0_899976223 /* d1[0] */
|
| -+ .short FIX_2_053119869_MINUS_2_562915447 /* d1[1] */
|
| -+ .short FIX_0_541196100_PLUS_0_765366865 /* d1[2] */
|
| -+ .short FIX_1_175875602 /* d1[3] */
|
| -+ /* reloadable constants */
|
| -+ .short FIX_1_175875602_MINUS_0_390180644 /* d2[0] */
|
| -+ .short FIX_0_541196100_MINUS_1_847759065 /* d2[1] */
|
| -+ .short FIX_3_072711026_MINUS_2_562915447 /* d2[2] */
|
| -+ .short FIX_1_175875602_MINUS_1_961570560 /* d2[3] */
|
| -+
|
| -+asm_function jsimd_idct_islow_neon
|
| -+
|
| -+ DCT_TABLE .req x0
|
| -+ COEF_BLOCK .req x1
|
| -+ OUTPUT_BUF .req x2
|
| -+ OUTPUT_COL .req x3
|
| -+ TMP1 .req x0
|
| -+ TMP2 .req x1
|
| -+ TMP3 .req x2
|
| -+ TMP4 .req x15
|
| -+
|
| -+ ROW0L .req v16
|
| -+ ROW0R .req v17
|
| -+ ROW1L .req v18
|
| -+ ROW1R .req v19
|
| -+ ROW2L .req v20
|
| -+ ROW2R .req v21
|
| -+ ROW3L .req v22
|
| -+ ROW3R .req v23
|
| -+ ROW4L .req v24
|
| -+ ROW4R .req v25
|
| -+ ROW5L .req v26
|
| -+ ROW5R .req v27
|
| -+ ROW6L .req v28
|
| -+ ROW6R .req v29
|
| -+ ROW7L .req v30
|
| -+ ROW7R .req v31
|
| -+ /* Save all NEON registers and x15 (32 NEON registers * 8 bytes + 16) */
|
| -+ sub sp, sp, 272
|
| -+ str x15, [sp], 16
|
| -+ adr x15, jsimd_idct_islow_neon_consts
|
| -+ st1 {v0.8b - v3.8b}, [sp], 32
|
| -+ st1 {v4.8b - v7.8b}, [sp], 32
|
| -+ st1 {v8.8b - v11.8b}, [sp], 32
|
| -+ st1 {v12.8b - v15.8b}, [sp], 32
|
| -+ st1 {v16.8b - v19.8b}, [sp], 32
|
| -+ st1 {v20.8b - v23.8b}, [sp], 32
|
| -+ st1 {v24.8b - v27.8b}, [sp], 32
|
| -+ st1 {v28.8b - v31.8b}, [sp], 32
|
| -+ ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [COEF_BLOCK], 32
|
| -+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
|
| -+ ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [COEF_BLOCK], 32
|
| -+ mul v16.4h, v16.4h, v0.4h
|
| -+ mul v17.4h, v17.4h, v1.4h
|
| -+ ins v16.2d[1], v17.2d[0] /* 128 bit q8 */
|
| -+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
|
| -+ mul v18.4h, v18.4h, v2.4h
|
| -+ mul v19.4h, v19.4h, v3.4h
|
| -+ ins v18.2d[1], v19.2d[0] /* 128 bit q9 */
|
| -+ ld1 {v24.4h, v25.4h, v26.4h, v27.4h}, [COEF_BLOCK], 32
|
| -+ mul v20.4h, v20.4h, v4.4h
|
| -+ mul v21.4h, v21.4h, v5.4h
|
| -+ ins v20.2d[1], v21.2d[0] /* 128 bit q10 */
|
| -+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
|
| -+ mul v22.4h, v22.4h, v6.4h
|
| -+ mul v23.4h, v23.4h, v7.4h
|
| -+ ins v22.2d[1], v23.2d[0] /* 128 bit q11 */
|
| -+ ld1 {v28.4h, v29.4h, v30.4h, v31.4h}, [COEF_BLOCK]
|
| -+ mul v24.4h, v24.4h, v0.4h
|
| -+ mul v25.4h, v25.4h, v1.4h
|
| -+ ins v24.2d[1], v25.2d[0] /* 128 bit q12 */
|
| -+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
|
| -+ mul v28.4h, v28.4h, v4.4h
|
| -+ mul v29.4h, v29.4h, v5.4h
|
| -+ ins v28.2d[1], v29.2d[0] /* 128 bit q14 */
|
| -+ mul v26.4h, v26.4h, v2.4h
|
| -+ mul v27.4h, v27.4h, v3.4h
|
| -+ ins v26.2d[1], v27.2d[0] /* 128 bit q13 */
|
| -+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x15] /* load constants */
|
| -+ add x15, x15, #16
|
| -+ mul v30.4h, v30.4h, v6.4h
|
| -+ mul v31.4h, v31.4h, v7.4h
|
| -+ ins v30.2d[1], v31.2d[0] /* 128 bit q15 */
|
| -+ /* Go to the bottom of the stack */
|
| -+ sub sp, sp, 352
|
| -+ stp x4, x5, [sp], 16
|
| -+ st1 {v8.4h - v11.4h}, [sp], 32 /* save NEON registers */
|
| -+ st1 {v12.4h - v15.4h}, [sp], 32
|
| -+ /* 1-D IDCT, pass 1, left 4x8 half */
|
| -+ add v4.4h, ROW7L.4h, ROW3L.4h
|
| -+ add v5.4h, ROW5L.4h, ROW1L.4h
|
| -+ smull v12.4s, v4.4h, XFIX_1_175875602_MINUS_1_961570560
|
| -+ smlal v12.4s, v5.4h, XFIX_1_175875602
|
| -+ smull v14.4s, v4.4h, XFIX_1_175875602
|
| -+ /* Check for the zero coefficients in the right 4x8 half */
|
| -+ smlal v14.4s, v5.4h, XFIX_1_175875602_MINUS_0_390180644
|
| -+ ssubl v6.4s, ROW0L.4h, ROW4L.4h
|
| -+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))]
|
| -+ smull v4.4s, ROW2L.4h, XFIX_0_541196100
|
| -+ smlal v4.4s, ROW6L.4h, XFIX_0_541196100_MINUS_1_847759065
|
| -+ orr x0, x4, x5
|
| -+ mov v8.16b, v12.16b
|
| -+ smlsl v12.4s, ROW5L.4h, XFIX_2_562915447
|
| -+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))]
|
| -+ smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447
|
| -+ shl v6.4s, v6.4s, #13
|
| -+ orr x0, x0, x4
|
| -+ smlsl v8.4s, ROW1L.4h, XFIX_0_899976223
|
| -+ orr x0, x0 , x5
|
| -+ add v2.4s, v6.4s, v4.4s
|
| -+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))]
|
| -+ mov v10.16b, v14.16b
|
| -+ add v2.4s, v2.4s, v12.4s
|
| -+ orr x0, x0, x4
|
| -+ smlsl v14.4s, ROW7L.4h, XFIX_0_899976223
|
| -+ orr x0, x0, x5
|
| -+ smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223
|
| -+ rshrn ROW1L.4h, v2.4s, #11
|
| -+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))]
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smlal v10.4s, ROW5L.4h, XFIX_2_053119869_MINUS_2_562915447
|
| -+ orr x0, x0, x4
|
| -+ smlsl v10.4s, ROW3L.4h, XFIX_2_562915447
|
| -+ orr x0, x0, x5
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865
|
| -+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))]
|
| -+ smlal v12.4s, ROW6L.4h, XFIX_0_541196100
|
| -+ sub v6.4s, v6.4s, v4.4s
|
| -+ orr x0, x0, x4
|
| -+ rshrn ROW6L.4h, v2.4s, #11
|
| -+ orr x0, x0, x5
|
| -+ add v2.4s, v6.4s, v10.4s
|
| -+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))]
|
| -+ sub v6.4s, v6.4s, v10.4s
|
| -+ saddl v10.4s, ROW0L.4h, ROW4L.4h
|
| -+ orr x0, x0, x4
|
| -+ rshrn ROW2L.4h, v2.4s, #11
|
| -+ orr x0, x0, x5
|
| -+ rshrn ROW5L.4h, v6.4s, #11
|
| -+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))]
|
| -+ shl v10.4s, v10.4s, #13
|
| -+ smlal v8.4s, ROW7L.4h, XFIX_0_298631336_MINUS_0_899976223
|
| -+ orr x0, x0, x4
|
| -+ add v4.4s, v10.4s, v12.4s
|
| -+ orr x0, x0, x5
|
| -+ cmp x0, #0 /* orrs instruction removed */
|
| -+ sub v2.4s, v10.4s, v12.4s
|
| -+ add v12.4s, v4.4s, v14.4s
|
| -+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))]
|
| -+ sub v4.4s, v4.4s, v14.4s
|
| -+ add v10.4s, v2.4s, v8.4s
|
| -+ orr x0, x4, x5
|
| -+ sub v6.4s, v2.4s, v8.4s
|
| -+ /* pop {x4, x5} */
|
| -+ sub sp, sp, 80
|
| -+ ldp x4, x5, [sp], 16
|
| -+ rshrn ROW7L.4h, v4.4s, #11
|
| -+ rshrn ROW3L.4h, v10.4s, #11
|
| -+ rshrn ROW0L.4h, v12.4s, #11
|
| -+ rshrn ROW4L.4h, v6.4s, #11
|
| -+
|
| -+ beq 3f /* Go to do some special handling for the sparse right 4x8 half */
|
| -+
|
| -+ /* 1-D IDCT, pass 1, right 4x8 half */
|
| -+ ld1 {v2.4h}, [x15] /* reload constants */
|
| -+ add v10.4h, ROW7R.4h, ROW3R.4h
|
| -+ add v8.4h, ROW5R.4h, ROW1R.4h
|
| -+ /* Transpose ROW6L <-> ROW7L (v3 available free register) */
|
| -+ transpose ROW6L, ROW7L, v3, .16b, .4h
|
| -+ smull v12.4s, v10.4h, XFIX_1_175875602_MINUS_1_961570560
|
| -+ smlal v12.4s, v8.4h, XFIX_1_175875602
|
| -+ /* Transpose ROW2L <-> ROW3L (v3 available free register) */
|
| -+ transpose ROW2L, ROW3L, v3, .16b, .4h
|
| -+ smull v14.4s, v10.4h, XFIX_1_175875602
|
| -+ smlal v14.4s, v8.4h, XFIX_1_175875602_MINUS_0_390180644
|
| -+ /* Transpose ROW0L <-> ROW1L (v3 available free register) */
|
| -+ transpose ROW0L, ROW1L, v3, .16b, .4h
|
| -+ ssubl v6.4s, ROW0R.4h, ROW4R.4h
|
| -+ smull v4.4s, ROW2R.4h, XFIX_0_541196100
|
| -+ smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065
|
| -+ /* Transpose ROW4L <-> ROW5L (v3 available free register) */
|
| -+ transpose ROW4L, ROW5L, v3, .16b, .4h
|
| -+ mov v8.16b, v12.16b
|
| -+ smlsl v12.4s, ROW5R.4h, XFIX_2_562915447
|
| -+ smlal v12.4s, ROW3R.4h, XFIX_3_072711026_MINUS_2_562915447
|
| -+ /* Transpose ROW1L <-> ROW3L (v3 available free register) */
|
| -+ transpose ROW1L, ROW3L, v3, .16b, .2s
|
| -+ shl v6.4s, v6.4s, #13
|
| -+ smlsl v8.4s, ROW1R.4h, XFIX_0_899976223
|
| -+ /* Transpose ROW4L <-> ROW6L (v3 available free register) */
|
| -+ transpose ROW4L, ROW6L, v3, .16b, .2s
|
| -+ add v2.4s, v6.4s, v4.4s
|
| -+ mov v10.16b, v14.16b
|
| -+ add v2.4s, v2.4s, v12.4s
|
| -+ /* Transpose ROW0L <-> ROW2L (v3 available free register) */
|
| -+ transpose ROW0L, ROW2L, v3, .16b, .2s
|
| -+ smlsl v14.4s, ROW7R.4h, XFIX_0_899976223
|
| -+ smlal v14.4s, ROW1R.4h, XFIX_1_501321110_MINUS_0_899976223
|
| -+ rshrn ROW1R.4h, v2.4s, #11
|
| -+ /* Transpose ROW5L <-> ROW7L (v3 available free register) */
|
| -+ transpose ROW5L, ROW7L, v3, .16b, .2s
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447
|
| -+ smlsl v10.4s, ROW3R.4h, XFIX_2_562915447
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smull v12.4s, ROW2R.4h, XFIX_0_541196100_PLUS_0_765366865
|
| -+ smlal v12.4s, ROW6R.4h, XFIX_0_541196100
|
| -+ sub v6.4s, v6.4s, v4.4s
|
| -+ rshrn ROW6R.4h, v2.4s, #11
|
| -+ add v2.4s, v6.4s, v10.4s
|
| -+ sub v6.4s, v6.4s, v10.4s
|
| -+ saddl v10.4s, ROW0R.4h, ROW4R.4h
|
| -+ rshrn ROW2R.4h, v2.4s, #11
|
| -+ rshrn ROW5R.4h, v6.4s, #11
|
| -+ shl v10.4s, v10.4s, #13
|
| -+ smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223
|
| -+ add v4.4s, v10.4s, v12.4s
|
| -+ sub v2.4s, v10.4s, v12.4s
|
| -+ add v12.4s, v4.4s, v14.4s
|
| -+ sub v4.4s, v4.4s, v14.4s
|
| -+ add v10.4s, v2.4s, v8.4s
|
| -+ sub v6.4s, v2.4s, v8.4s
|
| -+ rshrn ROW7R.4h, v4.4s, #11
|
| -+ rshrn ROW3R.4h, v10.4s, #11
|
| -+ rshrn ROW0R.4h, v12.4s, #11
|
| -+ rshrn ROW4R.4h, v6.4s, #11
|
| -+ /* Transpose right 4x8 half */
|
| -+ transpose ROW6R, ROW7R, v3, .16b, .4h
|
| -+ transpose ROW2R, ROW3R, v3, .16b, .4h
|
| -+ transpose ROW0R, ROW1R, v3, .16b, .4h
|
| -+ transpose ROW4R, ROW5R, v3, .16b, .4h
|
| -+ transpose ROW1R, ROW3R, v3, .16b, .2s
|
| -+ transpose ROW4R, ROW6R, v3, .16b, .2s
|
| -+ transpose ROW0R, ROW2R, v3, .16b, .2s
|
| -+ transpose ROW5R, ROW7R, v3, .16b, .2s
|
| -+
|
| -+1: /* 1-D IDCT, pass 2 (normal variant), left 4x8 half */
|
| -+ ld1 {v2.4h}, [x15] /* reload constants */
|
| -+ smull v12.4S, ROW1R.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ smlal v12.4s, ROW1L.4h, XFIX_1_175875602
|
| -+ smlal v12.4s, ROW3R.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560
|
| -+ smull v14.4s, ROW3R.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ smlal v14.4s, ROW3L.4h, XFIX_1_175875602
|
| -+ smlal v14.4s, ROW1R.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644
|
| -+ ssubl v6.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */
|
| -+ smull v4.4s, ROW2L.4h, XFIX_0_541196100
|
| -+ smlal v4.4s, ROW2R.4h, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L.4h <-> ROW2R.4h */
|
| -+ mov v8.16b, v12.16b
|
| -+ smlsl v12.4s, ROW1R.4h, XFIX_2_562915447 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447
|
| -+ shl v6.4s, v6.4s, #13
|
| -+ smlsl v8.4s, ROW1L.4h, XFIX_0_899976223
|
| -+ add v2.4s, v6.4s, v4.4s
|
| -+ mov v10.16b, v14.16b
|
| -+ add v2.4s, v2.4s, v12.4s
|
| -+ smlsl v14.4s, ROW3R.4h, XFIX_0_899976223 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223
|
| -+ shrn ROW1L.4h, v2.4s, #16
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smlal v10.4s, ROW1R.4h, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ smlsl v10.4s, ROW3L.4h, XFIX_2_562915447
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865
|
| -+ smlal v12.4s, ROW2R.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */
|
| -+ sub v6.4s, v6.4s, v4.4s
|
| -+ shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */
|
| -+ add v2.4s, v6.4s, v10.4s
|
| -+ sub v6.4s, v6.4s, v10.4s
|
| -+ saddl v10.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */
|
| -+ shrn ROW2L.4h, v2.4s, #16
|
| -+ shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ shl v10.4s, v10.4s, #13
|
| -+ smlal v8.4s, ROW3R.4h, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ add v4.4s, v10.4s, v12.4s
|
| -+ sub v2.4s, v10.4s, v12.4s
|
| -+ add v12.4s, v4.4s, v14.4s
|
| -+ sub v4.4s, v4.4s, v14.4s
|
| -+ add v10.4s, v2.4s, v8.4s
|
| -+ sub v6.4s, v2.4s, v8.4s
|
| -+ shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ shrn ROW3L.4h, v10.4s, #16
|
| -+ shrn ROW0L.4h, v12.4s, #16
|
| -+ shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */
|
| -+ /* 1-D IDCT, pass 2, right 4x8 half */
|
| -+ ld1 {v2.4h}, [x15] /* reload constants */
|
| -+ smull v12.4s, ROW5R.4h, XFIX_1_175875602
|
| -+ smlal v12.4s, ROW5L.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ smlal v12.4s, ROW7R.4h, XFIX_1_175875602_MINUS_1_961570560
|
| -+ smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ smull v14.4s, ROW7R.4h, XFIX_1_175875602
|
| -+ smlal v14.4s, ROW7L.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ smlal v14.4s, ROW5R.4h, XFIX_1_175875602_MINUS_0_390180644
|
| -+ smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ ssubl v6.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */
|
| -+ smull v4.4s, ROW6L.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */
|
| -+ smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065
|
| -+ mov v8.16b, v12.16b
|
| -+ smlsl v12.4s, ROW5R.4h, XFIX_2_562915447
|
| -+ smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ shl v6.4s, v6.4s, #13
|
| -+ smlsl v8.4s, ROW5L.4h, XFIX_0_899976223 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ add v2.4s, v6.4s, v4.4s
|
| -+ mov v10.16b, v14.16b
|
| -+ add v2.4s, v2.4s, v12.4s
|
| -+ smlsl v14.4s, ROW7R.4h, XFIX_0_899976223
|
| -+ smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447
|
| -+ smlsl v10.4s, ROW7L.4h, XFIX_2_562915447 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L.4h <-> ROW2R.4h */
|
| -+ smlal v12.4s, ROW6R.4h, XFIX_0_541196100
|
| -+ sub v6.4s, v6.4s, v4.4s
|
| -+ shrn ROW6R.4h, v2.4s, #16
|
| -+ add v2.4s, v6.4s, v10.4s
|
| -+ sub v6.4s, v6.4s, v10.4s
|
| -+ saddl v10.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */
|
| -+ shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */
|
| -+ shrn ROW5R.4h, v6.4s, #16
|
| -+ shl v10.4s, v10.4s, #13
|
| -+ smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223
|
| -+ add v4.4s, v10.4s, v12.4s
|
| -+ sub v2.4s, v10.4s, v12.4s
|
| -+ add v12.4s, v4.4s, v14.4s
|
| -+ sub v4.4s, v4.4s, v14.4s
|
| -+ add v10.4s, v2.4s, v8.4s
|
| -+ sub v6.4s, v2.4s, v8.4s
|
| -+ shrn ROW7R.4h, v4.4s, #16
|
| -+ shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */
|
| -+ shrn ROW4R.4h, v6.4s, #16
|
| -+
|
| -+2: /* Descale to 8-bit and range limit */
|
| -+ ins v16.2d[1], v17.2d[0]
|
| -+ ins v18.2d[1], v19.2d[0]
|
| -+ ins v20.2d[1], v21.2d[0]
|
| -+ ins v22.2d[1], v23.2d[0]
|
| -+ sqrshrn v16.8b, v16.8h, #2
|
| -+ sqrshrn2 v16.16b, v18.8h, #2
|
| -+ sqrshrn v18.8b, v20.8h, #2
|
| -+ sqrshrn2 v18.16b, v22.8h, #2
|
| -+
|
| -+ /* vpop {v8.4h - d15.4h} */ /* restore NEON registers */
|
| -+ ld1 {v8.4h - v11.4h}, [sp], 32
|
| -+ ld1 {v12.4h - v15.4h}, [sp], 32
|
| -+ ins v24.2d[1], v25.2d[0]
|
| -+
|
| -+ sqrshrn v20.8b, v24.8h, #2
|
| -+ /* Transpose the final 8-bit samples and do signed->unsigned conversion */
|
| -+ /* trn1 v16.8h, v16.8h, v18.8h */
|
| -+ transpose v16, v18, v3, .16b, .8h
|
| -+ ins v26.2d[1], v27.2d[0]
|
| -+ ins v28.2d[1], v29.2d[0]
|
| -+ ins v30.2d[1], v31.2d[0]
|
| -+ sqrshrn2 v20.16b, v26.8h, #2
|
| -+ sqrshrn v22.8b, v28.8h, #2
|
| -+ movi v0.16b, #(CENTERJSAMPLE)
|
| -+ sqrshrn2 v22.16b, v30.8h, #2
|
| -+ transpose_single v16, v17, v3, .2d, .8b
|
| -+ transpose_single v18, v19, v3, .2d, .8b
|
| -+ add v16.8b, v16.8b, v0.8b
|
| -+ add v17.8b, v17.8b, v0.8b
|
| -+ add v18.8b, v18.8b, v0.8b
|
| -+ add v19.8b, v19.8b, v0.8b
|
| -+ transpose v20, v22, v3, .16b, .8h
|
| -+ /* Store results to the output buffer */
|
| -+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
| -+ add TMP1, TMP1, OUTPUT_COL
|
| -+ add TMP2, TMP2, OUTPUT_COL
|
| -+ st1 {v16.8b}, [TMP1]
|
| -+ transpose_single v20, v21, v3, .2d, .8b
|
| -+ st1 {v17.8b}, [TMP2]
|
| -+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
| -+ add TMP1, TMP1, OUTPUT_COL
|
| -+ add TMP2, TMP2, OUTPUT_COL
|
| -+ st1 {v18.8b}, [TMP1]
|
| -+ add v20.8b, v20.8b, v0.8b
|
| -+ add v21.8b, v21.8b, v0.8b
|
| -+ st1 {v19.8b}, [TMP2]
|
| -+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
| -+ ldp TMP3, TMP4, [OUTPUT_BUF]
|
| -+ add TMP1, TMP1, OUTPUT_COL
|
| -+ add TMP2, TMP2, OUTPUT_COL
|
| -+ add TMP3, TMP3, OUTPUT_COL
|
| -+ add TMP4, TMP4, OUTPUT_COL
|
| -+ transpose_single v22, v23, v3, .2d, .8b
|
| -+ st1 {v20.8b}, [TMP1]
|
| -+ add v22.8b, v22.8b, v0.8b
|
| -+ add v23.8b, v23.8b, v0.8b
|
| -+ st1 {v21.8b}, [TMP2]
|
| -+ st1 {v22.8b}, [TMP3]
|
| -+ st1 {v23.8b}, [TMP4]
|
| -+ ldr x15, [sp], 16
|
| -+ ld1 {v0.8b - v3.8b}, [sp], 32
|
| -+ ld1 {v4.8b - v7.8b}, [sp], 32
|
| -+ ld1 {v8.8b - v11.8b}, [sp], 32
|
| -+ ld1 {v12.8b - v15.8b}, [sp], 32
|
| -+ ld1 {v16.8b - v19.8b}, [sp], 32
|
| -+ ld1 {v20.8b - v23.8b}, [sp], 32
|
| -+ ld1 {v24.8b - v27.8b}, [sp], 32
|
| -+ ld1 {v28.8b - v31.8b}, [sp], 32
|
| -+ blr x30
|
| -+
|
| -+3: /* Left 4x8 half is done, right 4x8 half contains mostly zeros */
|
| -+
|
| -+ /* Transpose left 4x8 half */
|
| -+ transpose ROW6L, ROW7L, v3, .16b, .4h
|
| -+ transpose ROW2L, ROW3L, v3, .16b, .4h
|
| -+ transpose ROW0L, ROW1L, v3, .16b, .4h
|
| -+ transpose ROW4L, ROW5L, v3, .16b, .4h
|
| -+ shl ROW0R.4h, ROW0R.4h, #2 /* PASS1_BITS */
|
| -+ transpose ROW1L, ROW3L, v3, .16b, .2s
|
| -+ transpose ROW4L, ROW6L, v3, .16b, .2s
|
| -+ transpose ROW0L, ROW2L, v3, .16b, .2s
|
| -+ transpose ROW5L, ROW7L, v3, .16b, .2s
|
| -+ cmp x0, #0
|
| -+ beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */
|
| -+
|
| -+ /* Only row 0 is non-zero for the right 4x8 half */
|
| -+ dup ROW1R.4h, ROW0R.4h[1]
|
| -+ dup ROW2R.4h, ROW0R.4h[2]
|
| -+ dup ROW3R.4h, ROW0R.4h[3]
|
| -+ dup ROW4R.4h, ROW0R.4h[0]
|
| -+ dup ROW5R.4h, ROW0R.4h[1]
|
| -+ dup ROW6R.4h, ROW0R.4h[2]
|
| -+ dup ROW7R.4h, ROW0R.4h[3]
|
| -+ dup ROW0R.4h, ROW0R.4h[0]
|
| -+ b 1b /* Go to 'normal' second pass */
|
| -+
|
| -+4: /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */
|
| -+ ld1 {v2.4h}, [x15] /* reload constants */
|
| -+ smull v12.4s, ROW1L.4h, XFIX_1_175875602
|
| -+ smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560
|
| -+ smull v14.4s, ROW3L.4h, XFIX_1_175875602
|
| -+ smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644
|
| -+ smull v4.4s, ROW2L.4h, XFIX_0_541196100
|
| -+ sshll v6.4s, ROW0L.4h, #13
|
| -+ mov v8.16b, v12.16b
|
| -+ smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447
|
| -+ smlsl v8.4s, ROW1L.4h, XFIX_0_899976223
|
| -+ add v2.4s, v6.4s, v4.4s
|
| -+ mov v10.16b, v14.16b
|
| -+ smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223
|
| -+ add v2.4s, v2.4s, v12.4s
|
| -+ add v12.4s, v12.4s, v12.4s
|
| -+ smlsl v10.4s, ROW3L.4h, XFIX_2_562915447
|
| -+ shrn ROW1L.4h, v2.4s, #16
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865
|
| -+ sub v6.4s, v6.4s, v4.4s
|
| -+ shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */
|
| -+ add v2.4s, v6.4s, v10.4s
|
| -+ sub v6.4s, v6.4s, v10.4s
|
| -+ sshll v10.4s, ROW0L.4h, #13
|
| -+ shrn ROW2L.4h, v2.4s, #16
|
| -+ shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ add v4.4s, v10.4s, v12.4s
|
| -+ sub v2.4s, v10.4s, v12.4s
|
| -+ add v12.4s, v4.4s, v14.4s
|
| -+ sub v4.4s, v4.4s, v14.4s
|
| -+ add v10.4s, v2.4s, v8.4s
|
| -+ sub v6.4s, v2.4s, v8.4s
|
| -+ shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ shrn ROW3L.4h, v10.4s, #16
|
| -+ shrn ROW0L.4h, v12.4s, #16
|
| -+ shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */
|
| -+ /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), right 4x8 half */
|
| -+ ld1 {v2.4h}, [x15] /* reload constants */
|
| -+ smull v12.4s, ROW5L.4h, XFIX_1_175875602
|
| -+ smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560
|
| -+ smull v14.4s, ROW7L.4h, XFIX_1_175875602
|
| -+ smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644
|
| -+ smull v4.4s, ROW6L.4h, XFIX_0_541196100
|
| -+ sshll v6.4s, ROW4L.4h, #13
|
| -+ mov v8.16b, v12.16b
|
| -+ smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447
|
| -+ smlsl v8.4s, ROW5L.4h, XFIX_0_899976223
|
| -+ add v2.4s, v6.4s, v4.4s
|
| -+ mov v10.16b, v14.16b
|
| -+ smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223
|
| -+ add v2.4s, v2.4s, v12.4s
|
| -+ add v12.4s, v12.4s, v12.4s
|
| -+ smlsl v10.4s, ROW7L.4h, XFIX_2_562915447
|
| -+ shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */
|
| -+ sub v2.4s, v2.4s, v12.4s
|
| -+ smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865
|
| -+ sub v6.4s, v6.4s, v4.4s
|
| -+ shrn ROW6R.4h, v2.4s, #16
|
| -+ add v2.4s, v6.4s, v10.4s
|
| -+ sub v6.4s, v6.4s, v10.4s
|
| -+ sshll v10.4s, ROW4L.4h, #13
|
| -+ shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */
|
| -+ shrn ROW5R.4h, v6.4s, #16
|
| -+ add v4.4s, v10.4s, v12.4s
|
| -+ sub v2.4s, v10.4s, v12.4s
|
| -+ add v12.4s, v4.4s, v14.4s
|
| -+ sub v4.4s, v4.4s, v14.4s
|
| -+ add v10.4s, v2.4s, v8.4s
|
| -+ sub v6.4s, v2.4s, v8.4s
|
| -+ shrn ROW7R.4h, v4.4s, #16
|
| -+ shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */
|
| -+ shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */
|
| -+ shrn ROW4R.4h, v6.4s, #16
|
| -+ b 2b /* Go to epilogue */
|
| -+
|
| -+ .unreq DCT_TABLE
|
| -+ .unreq COEF_BLOCK
|
| -+ .unreq OUTPUT_BUF
|
| -+ .unreq OUTPUT_COL
|
| -+ .unreq TMP1
|
| -+ .unreq TMP2
|
| -+ .unreq TMP3
|
| -+ .unreq TMP4
|
| -+
|
| -+ .unreq ROW0L
|
| -+ .unreq ROW0R
|
| -+ .unreq ROW1L
|
| -+ .unreq ROW1R
|
| -+ .unreq ROW2L
|
| -+ .unreq ROW2R
|
| -+ .unreq ROW3L
|
| -+ .unreq ROW3R
|
| -+ .unreq ROW4L
|
| -+ .unreq ROW4R
|
| -+ .unreq ROW5L
|
| -+ .unreq ROW5R
|
| -+ .unreq ROW6L
|
| -+ .unreq ROW6R
|
| -+ .unreq ROW7L
|
| -+ .unreq ROW7R
|
| -+
|
| -+
|
| -+/*****************************************************************************/
|
| -+
|
| -+/*
|
| -+ * jsimd_idct_ifast_neon
|
| ++DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle,
|
| ++ unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
|
| ++ int width, int pitch, int height, int pixelFormat, int flags);
|
| +
|
| +- [INPUT] j = instance handle previously returned from a call to
|
| +- tjInitDecompress()
|
| +- [INPUT] srcbuf = pointer to a user-allocated buffer containing the JPEG image
|
| +- to decompress
|
| +- [INPUT] size = size of the JPEG image buffer (in bytes)
|
| +- [OUTPUT] width = width (in pixels) of the JPEG image
|
| +- [OUTPUT] height = height (in pixels) of the JPEG image
|
| +
|
| +- RETURNS: 0 on success, -1 on error
|
| +-*/
|
| +-DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle j,
|
| +- unsigned char *srcbuf, unsigned long size,
|
| +- int *width, int *height);
|
| ++/**
|
| ++ * Decompress a JPEG image to a YUV planar image. This function performs JPEG
|
| ++ * decompression but leaves out the color conversion step, so a planar YUV
|
| ++ * image is generated instead of an RGB image. The padding of the planes in
|
| ++ * this image is the same as in the images generated by #tjEncodeYUV2(). Note
|
| ++ * that, if the width or height of the image is not an even multiple of the MCU
|
| ++ * block size (see #tjMCUWidth and #tjMCUHeight), then an intermediate buffer
|
| ++ * copy will be performed within TurboJPEG.
|
| ++ * <p>
|
| ++ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
|
| ++ * convention of the digital video community, the TurboJPEG API uses "YUV" to
|
| ++ * refer to an image format consisting of Y, Cb, and Cr image planes.
|
| + *
|
| -+ * This function contains a fast, not so accurate integer implementation of
|
| -+ * the inverse DCT (Discrete Cosine Transform). It uses the same calculations
|
| -+ * and produces exactly the same output as IJG's original 'jpeg_idct_ifast'
|
| -+ * function from jidctfst.c
|
| ++ * @param handle a handle to a TurboJPEG decompressor or transformer instance
|
| ++ * @param jpegBuf pointer to a buffer containing the JPEG image to decompress
|
| ++ * @param jpegSize size of the JPEG image (in bytes)
|
| ++ * @param dstBuf pointer to an image buffer that will receive the YUV image.
|
| ++ * Use #tjBufSizeYUV() to determine the appropriate size for this buffer
|
| ++ * based on the image width, height, and level of subsampling.
|
| ++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
|
| ++ * "flags".
|
| + *
|
| -+ * Normally 1-D AAN DCT needs 5 multiplications and 29 additions.
|
| -+ * But in ARM NEON case some extra additions are required because VQDMULH
|
| -+ * instruction can't handle the constants larger than 1. So the expressions
|
| -+ * like "x * 1.082392200" have to be converted to "x * 0.082392200 + x",
|
| -+ * which introduces an extra addition. Overall, there are 6 extra additions
|
| -+ * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions.
|
| ++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
|
| + */
|
| -+
|
| -+#define XFIX_1_082392200 v0.4h[0]
|
| -+#define XFIX_1_414213562 v0.4h[1]
|
| -+#define XFIX_1_847759065 v0.4h[2]
|
| -+#define XFIX_2_613125930 v0.4h[3]
|
| -+
|
| -+.balign 16
|
| -+jsimd_idct_ifast_neon_consts:
|
| -+ .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
|
| -+ .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
|
| -+ .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
|
| -+ .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
|
| -+
|
| -+asm_function jsimd_idct_ifast_neon
|
| -+
|
| -+ DCT_TABLE .req x0
|
| -+ COEF_BLOCK .req x1
|
| -+ OUTPUT_BUF .req x2
|
| -+ OUTPUT_COL .req x3
|
| -+ TMP1 .req x0
|
| -+ TMP2 .req x1
|
| -+ TMP3 .req x2
|
| -+ TMP4 .req x22
|
| -+ TMP5 .req x23
|
| -+
|
| -+ /* Load and dequantize coefficients into NEON registers
|
| -+ * with the following allocation:
|
| -+ * 0 1 2 3 | 4 5 6 7
|
| -+ * ---------+--------
|
| -+ * 0 | d16 | d17 ( v8.8h )
|
| -+ * 1 | d18 | d19 ( v9.8h )
|
| -+ * 2 | d20 | d21 ( v10.8h )
|
| -+ * 3 | d22 | d23 ( v11.8h )
|
| -+ * 4 | d24 | d25 ( v12.8h )
|
| -+ * 5 | d26 | d27 ( v13.8h )
|
| -+ * 6 | d28 | d29 ( v14.8h )
|
| -+ * 7 | d30 | d31 ( v15.8h )
|
| -+ */
|
| -+ /* Save NEON registers used in fast IDCT */
|
| -+ sub sp, sp, #176
|
| -+ stp x22, x23, [sp], 16
|
| -+ adr x23, jsimd_idct_ifast_neon_consts
|
| -+ st1 {v0.8b - v3.8b}, [sp], 32
|
| -+ st1 {v4.8b - v7.8b}, [sp], 32
|
| -+ st1 {v8.8b - v11.8b}, [sp], 32
|
| -+ st1 {v12.8b - v15.8b}, [sp], 32
|
| -+ st1 {v16.8b - v19.8b}, [sp], 32
|
| -+ ld1 {v8.8h, v9.8h}, [COEF_BLOCK], 32
|
| -+ ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32
|
| -+ ld1 {v10.8h, v11.8h}, [COEF_BLOCK], 32
|
| -+ mul v8.8h, v8.8h, v0.8h
|
| -+ ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32
|
| -+ mul v9.8h, v9.8h, v1.8h
|
| -+ ld1 {v12.8h, v13.8h}, [COEF_BLOCK], 32
|
| -+ mul v10.8h, v10.8h, v2.8h
|
| -+ ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32
|
| -+ mul v11.8h, v11.8h, v3.8h
|
| -+ ld1 {v14.8h, v15.8h}, [COEF_BLOCK], 32
|
| -+ mul v12.8h, v12.8h, v0.8h
|
| -+ ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32
|
| -+ mul v14.8h, v14.8h, v2.8h
|
| -+ mul v13.8h, v13.8h, v1.8h
|
| -+ ld1 {v0.4h}, [x23] /* load constants */
|
| -+ mul v15.8h, v15.8h, v3.8h
|
| -+
|
| -+ /* 1-D IDCT, pass 1 */
|
| -+ sub v2.8h, v10.8h, v14.8h
|
| -+ add v14.8h, v10.8h, v14.8h
|
| -+ sub v1.8h, v11.8h, v13.8h
|
| -+ add v13.8h, v11.8h, v13.8h
|
| -+ sub v5.8h, v9.8h, v15.8h
|
| -+ add v15.8h, v9.8h, v15.8h
|
| -+ sqdmulh v4.8h, v2.8h, XFIX_1_414213562
|
| -+ sqdmulh v6.8h, v1.8h, XFIX_2_613125930
|
| -+ add v3.8h, v1.8h, v1.8h
|
| -+ sub v1.8h, v5.8h, v1.8h
|
| -+ add v10.8h, v2.8h, v4.8h
|
| -+ sqdmulh v4.8h, v1.8h, XFIX_1_847759065
|
| -+ sub v2.8h, v15.8h, v13.8h
|
| -+ add v3.8h, v3.8h, v6.8h
|
| -+ sqdmulh v6.8h, v2.8h, XFIX_1_414213562
|
| -+ add v1.8h, v1.8h, v4.8h
|
| -+ sqdmulh v4.8h, v5.8h, XFIX_1_082392200
|
| -+ sub v10.8h, v10.8h, v14.8h
|
| -+ add v2.8h, v2.8h, v6.8h
|
| -+ sub v6.8h, v8.8h, v12.8h
|
| -+ add v12.8h, v8.8h, v12.8h
|
| -+ add v9.8h, v5.8h, v4.8h
|
| -+ add v5.8h, v6.8h, v10.8h
|
| -+ sub v10.8h, v6.8h, v10.8h
|
| -+ add v6.8h, v15.8h, v13.8h
|
| -+ add v8.8h, v12.8h, v14.8h
|
| -+ sub v3.8h, v6.8h, v3.8h
|
| -+ sub v12.8h, v12.8h, v14.8h
|
| -+ sub v3.8h, v3.8h, v1.8h
|
| -+ sub v1.8h, v9.8h, v1.8h
|
| -+ add v2.8h, v3.8h, v2.8h
|
| -+ sub v15.8h, v8.8h, v6.8h
|
| -+ add v1.8h, v1.8h, v2.8h
|
| -+ add v8.8h, v8.8h, v6.8h
|
| -+ add v14.8h, v5.8h, v3.8h
|
| -+ sub v9.8h, v5.8h, v3.8h
|
| -+ sub v13.8h, v10.8h, v2.8h
|
| -+ add v10.8h, v10.8h, v2.8h
|
| -+ /* Transpose q8-q9 */
|
| -+ mov v18.16b, v8.16b
|
| -+ trn1 v8.8h, v8.8h, v9.8h
|
| -+ trn2 v9.8h, v18.8h, v9.8h
|
| -+ sub v11.8h, v12.8h, v1.8h
|
| -+ /* Transpose q14-q15 */
|
| -+ mov v18.16b, v14.16b
|
| -+ trn1 v14.8h, v14.8h, v15.8h
|
| -+ trn2 v15.8h, v18.8h, v15.8h
|
| -+ add v12.8h, v12.8h, v1.8h
|
| -+ /* Transpose q10-q11 */
|
| -+ mov v18.16b, v10.16b
|
| -+ trn1 v10.8h, v10.8h, v11.8h
|
| -+ trn2 v11.8h, v18.8h, v11.8h
|
| -+ /* Transpose q12-q13 */
|
| -+ mov v18.16b, v12.16b
|
| -+ trn1 v12.8h, v12.8h, v13.8h
|
| -+ trn2 v13.8h, v18.8h, v13.8h
|
| -+ /* Transpose q9-q11 */
|
| -+ mov v18.16b, v9.16b
|
| -+ trn1 v9.4s, v9.4s, v11.4s
|
| -+ trn2 v11.4s, v18.4s, v11.4s
|
| -+ /* Transpose q12-q14 */
|
| -+ mov v18.16b, v12.16b
|
| -+ trn1 v12.4s, v12.4s, v14.4s
|
| -+ trn2 v14.4s, v18.4s, v14.4s
|
| -+ /* Transpose q8-q10 */
|
| -+ mov v18.16b, v8.16b
|
| -+ trn1 v8.4s, v8.4s, v10.4s
|
| -+ trn2 v10.4s, v18.4s, v10.4s
|
| -+ /* Transpose q13-q15 */
|
| -+ mov v18.16b, v13.16b
|
| -+ trn1 v13.4s, v13.4s, v15.4s
|
| -+ trn2 v15.4s, v18.4s, v15.4s
|
| -+ /* vswp v14.4h, v10-MSB.4h */
|
| -+ umov x22, v14.d[0]
|
| -+ ins v14.2d[0], v10.2d[1]
|
| -+ ins v10.2d[1], x22
|
| -+ /* vswp v13.4h, v9MSB.4h */
|
| -+
|
| -+ umov x22, v13.d[0]
|
| -+ ins v13.2d[0], v9.2d[1]
|
| -+ ins v9.2d[1], x22
|
| -+ /* 1-D IDCT, pass 2 */
|
| -+ sub v2.8h, v10.8h, v14.8h
|
| -+ /* vswp v15.4h, v11MSB.4h */
|
| -+ umov x22, v15.d[0]
|
| -+ ins v15.2d[0], v11.2d[1]
|
| -+ ins v11.2d[1], x22
|
| -+ add v14.8h, v10.8h, v14.8h
|
| -+ /* vswp v12.4h, v8-MSB.4h */
|
| -+ umov x22, v12.d[0]
|
| -+ ins v12.2d[0], v8.2d[1]
|
| -+ ins v8.2d[1], x22
|
| -+ sub v1.8h, v11.8h, v13.8h
|
| -+ add v13.8h, v11.8h, v13.8h
|
| -+ sub v5.8h, v9.8h, v15.8h
|
| -+ add v15.8h, v9.8h, v15.8h
|
| -+ sqdmulh v4.8h, v2.8h, XFIX_1_414213562
|
| -+ sqdmulh v6.8h, v1.8h, XFIX_2_613125930
|
| -+ add v3.8h, v1.8h, v1.8h
|
| -+ sub v1.8h, v5.8h, v1.8h
|
| -+ add v10.8h, v2.8h, v4.8h
|
| -+ sqdmulh v4.8h, v1.8h, XFIX_1_847759065
|
| -+ sub v2.8h, v15.8h, v13.8h
|
| -+ add v3.8h, v3.8h, v6.8h
|
| -+ sqdmulh v6.8h, v2.8h, XFIX_1_414213562
|
| -+ add v1.8h, v1.8h, v4.8h
|
| -+ sqdmulh v4.8h, v5.8h, XFIX_1_082392200
|
| -+ sub v10.8h, v10.8h, v14.8h
|
| -+ add v2.8h, v2.8h, v6.8h
|
| -+ sub v6.8h, v8.8h, v12.8h
|
| -+ add v12.8h, v8.8h, v12.8h
|
| -+ add v9.8h, v5.8h, v4.8h
|
| -+ add v5.8h, v6.8h, v10.8h
|
| -+ sub v10.8h, v6.8h, v10.8h
|
| -+ add v6.8h, v15.8h, v13.8h
|
| -+ add v8.8h, v12.8h, v14.8h
|
| -+ sub v3.8h, v6.8h, v3.8h
|
| -+ sub v12.8h, v12.8h, v14.8h
|
| -+ sub v3.8h, v3.8h, v1.8h
|
| -+ sub v1.8h, v9.8h, v1.8h
|
| -+ add v2.8h, v3.8h, v2.8h
|
| -+ sub v15.8h, v8.8h, v6.8h
|
| -+ add v1.8h, v1.8h, v2.8h
|
| -+ add v8.8h, v8.8h, v6.8h
|
| -+ add v14.8h, v5.8h, v3.8h
|
| -+ sub v9.8h, v5.8h, v3.8h
|
| -+ sub v13.8h, v10.8h, v2.8h
|
| -+ add v10.8h, v10.8h, v2.8h
|
| -+ sub v11.8h, v12.8h, v1.8h
|
| -+ add v12.8h, v12.8h, v1.8h
|
| -+ /* Descale to 8-bit and range limit */
|
| -+ movi v0.16b, #0x80
|
| -+ sqshrn v8.8b, v8.8h, #5
|
| -+ sqshrn2 v8.16b, v9.8h, #5
|
| -+ sqshrn v9.8b, v10.8h, #5
|
| -+ sqshrn2 v9.16b, v11.8h, #5
|
| -+ sqshrn v10.8b, v12.8h, #5
|
| -+ sqshrn2 v10.16b, v13.8h, #5
|
| -+ sqshrn v11.8b, v14.8h, #5
|
| -+ sqshrn2 v11.16b, v15.8h, #5
|
| -+ add v8.16b, v8.16b, v0.16b
|
| -+ add v9.16b, v9.16b, v0.16b
|
| -+ add v10.16b, v10.16b, v0.16b
|
| -+ add v11.16b, v11.16b, v0.16b
|
| -+ /* Transpose the final 8-bit samples */
|
| -+ /* Transpose q8-q9 */
|
| -+ mov v18.16b, v8.16b
|
| -+ trn1 v8.8h, v8.8h, v9.8h
|
| -+ trn2 v9.8h, v18.8h, v9.8h
|
| -+ /* Transpose q10-q11 */
|
| -+ mov v18.16b, v10.16b
|
| -+ trn1 v10.8h, v10.8h, v11.8h
|
| -+ trn2 v11.8h, v18.8h, v11.8h
|
| -+ /* Transpose q8-q10 */
|
| -+ mov v18.16b, v8.16b
|
| -+ trn1 v8.4s, v8.4s, v10.4s
|
| -+ trn2 v10.4s, v18.4s, v10.4s
|
| -+ /* Transpose q9-q11 */
|
| -+ mov v18.16b, v9.16b
|
| -+ trn1 v9.4s, v9.4s, v11.4s
|
| -+ trn2 v11.4s, v18.4s, v11.4s
|
| -+ /* make copy */
|
| -+ ins v17.2d[0], v8.2d[1]
|
| -+ /* Transpose d16-d17-msb */
|
| -+ mov v18.16b, v8.16b
|
| -+ trn1 v8.8b, v8.8b, v17.8b
|
| -+ trn2 v17.8b, v18.8b, v17.8b
|
| -+ /* make copy */
|
| -+ ins v19.2d[0], v9.2d[1]
|
| -+ mov v18.16b, v9.16b
|
| -+ trn1 v9.8b, v9.8b, v19.8b
|
| -+ trn2 v19.8b, v18.8b, v19.8b
|
| -+ /* Store results to the output buffer */
|
| -+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
| -+ add TMP1, TMP1, OUTPUT_COL
|
| -+ add TMP2, TMP2, OUTPUT_COL
|
| -+ st1 {v8.8b}, [TMP1]
|
| -+ st1 {v17.8b}, [TMP2]
|
| -+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
| -+ add TMP1, TMP1, OUTPUT_COL
|
| -+ add TMP2, TMP2, OUTPUT_COL
|
| -+ st1 {v9.8b}, [TMP1]
|
| -+ /* make copy */
|
| -+ ins v7.2d[0], v10.2d[1]
|
| -+ mov v18.16b, v10.16b
|
| -+ trn1 v10.8b, v10.8b, v7.8b
|
| -+ trn2 v7.8b, v18.8b, v7.8b
|
| -+ st1 {v19.8b}, [TMP2]
|
| -+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
| -+ ldp TMP4, TMP5, [OUTPUT_BUF], 16
|
| -+ add TMP1, TMP1, OUTPUT_COL
|
| -+ add TMP2, TMP2, OUTPUT_COL
|
| -+ add TMP4, TMP4, OUTPUT_COL
|
| -+ add TMP5, TMP5, OUTPUT_COL
|
| -+ st1 {v10.8b}, [TMP1]
|
| -+ /* make copy */
|
| -+ ins v16.2d[0], v11.2d[1]
|
| -+ mov v18.16b, v11.16b
|
| -+ trn1 v11.8b, v11.8b, v16.8b
|
| -+ trn2 v16.8b, v18.8b, v16.8b
|
| -+ st1 {v7.8b}, [TMP2]
|
| -+ st1 {v11.8b}, [TMP4]
|
| -+ st1 {v16.8b}, [TMP5]
|
| -+ sub sp, sp, #176
|
| -+ ldp x22, x23, [sp], 16
|
| -+ ld1 {v0.8b - v3.8b}, [sp], 32
|
| -+ ld1 {v4.8b - v7.8b}, [sp], 32
|
| -+ ld1 {v8.8b - v11.8b}, [sp], 32
|
| -+ ld1 {v12.8b - v15.8b}, [sp], 32
|
| -+ ld1 {v16.8b - v19.8b}, [sp], 32
|
| -+ blr x30
|
| -+
|
| -+ .unreq DCT_TABLE
|
| -+ .unreq COEF_BLOCK
|
| -+ .unreq OUTPUT_BUF
|
| -+ .unreq OUTPUT_COL
|
| -+ .unreq TMP1
|
| -+ .unreq TMP2
|
| -+ .unreq TMP3
|
| -+ .unreq TMP4
|
| -+
|
| -+
|
| -+/*****************************************************************************/
|
| -+
|
| -+/*
|
| -+ * jsimd_idct_4x4_neon
|
| ++DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle,
|
| ++ unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
|
| ++ int flags);
|
| +
|
| +
|
| +-/*
|
| +- int tjDecompress(tjhandle j,
|
| +- unsigned char *srcbuf, unsigned long size,
|
| +- unsigned char *dstbuf, int width, int pitch, int height, int pixelsize,
|
| +- int flags)
|
| ++/**
|
| ++ * Create a new TurboJPEG transformer instance.
|
| + *
|
| -+ * This function contains inverse-DCT code for getting reduced-size
|
| -+ * 4x4 pixels output from an 8x8 DCT block. It uses the same calculations
|
| -+ * and produces exactly the same output as IJG's original 'jpeg_idct_4x4'
|
| -+ * function from jpeg-6b (jidctred.c).
|
| ++ * @return a handle to the newly-created instance, or NULL if an error
|
| ++ * occurred (see #tjGetErrorStr().)
|
| ++ */
|
| ++DLLEXPORT tjhandle DLLCALL tjInitTransform(void);
|
| +
|
| +- [INPUT] j = instance handle previously returned from a call to
|
| +- tjInitDecompress()
|
| +- [INPUT] srcbuf = pointer to a user-allocated buffer containing the JPEG image
|
| +- to decompress
|
| +- [INPUT] size = size of the JPEG image buffer (in bytes)
|
| +- [INPUT] dstbuf = pointer to user-allocated image buffer which will receive
|
| +- the bitmap image. This buffer should normally be pitch*height
|
| +- bytes in size, although this pointer may also be used to decompress into
|
| +- a specific region of a larger buffer.
|
| +- [INPUT] width = width (in pixels) of the destination image
|
| +- [INPUT] pitch = bytes per line of the destination image (width*pixelsize if the
|
| +- bitmap is unpadded, else TJPAD(width*pixelsize) if each line of the bitmap
|
| +- is padded to the nearest 32-bit boundary, such as is the case for Windows
|
| +- bitmaps. You can also be clever and use this parameter to skip lines, etc.,
|
| +- as long as the pitch is greater than 0.)
|
| +- [INPUT] height = height (in pixels) of the destination image
|
| +- [INPUT] pixelsize = size (in bytes) of each pixel in the destination image
|
| +- RGBA/RGBx and BGRA/BGRx: 4, RGB and BGR: 3
|
| +- [INPUT] flags = the bitwise OR of one or more of the following
|
| +
|
| +- TJ_BGR: The components of each pixel in the destination image should be
|
| +- written in B,G,R order, not R,G,B
|
| +- TJ_BOTTOMUP: The destination image should be stored in bottom-up
|
| +- (Windows) order, not top-down
|
| +- TJ_FORCEMMX: Valid only for the Intel Performance Primitives implementation
|
| +- of this codec-- force IPP to use MMX code (bypass CPU auto-detection)
|
| +- TJ_FORCESSE: Valid only for the Intel Performance Primitives implementation
|
| +- of this codec-- force IPP to use SSE code (bypass CPU auto-detection)
|
| +- TJ_FORCESSE2: Valid only for the Intel Performance Primitives implementation
|
| +- of this codec-- force IPP to use SSE2 code (bypass CPU auto-detection)
|
| ++/**
|
| ++ * Losslessly transform a JPEG image into another JPEG image. Lossless
|
| ++ * transforms work by moving the raw coefficients from one JPEG image structure
|
| ++ * to another without altering the values of the coefficients. While this is
|
| ++ * typically faster than decompressing the image, transforming it, and
|
| ++ * re-compressing it, lossless transforms are not free. Each lossless
|
| ++ * transform requires reading and performing Huffman decoding on all of the
|
| ++ * coefficients in the source image, regardless of the size of the destination
|
| ++ * image. Thus, this function provides a means of generating multiple
|
| ++ * transformed images from the same source or applying multiple
|
| ++ * transformations simultaneously, in order to eliminate the need to read the
|
| ++ * source coefficients multiple times.
|
| + *
|
| -+ * NOTE: jpeg-8 has an improved implementation of 4x4 inverse-DCT, which
|
| -+ * requires much less arithmetic operations and hence should be faster.
|
| -+ * The primary purpose of this particular NEON optimized function is
|
| -+ * bit exact compatibility with jpeg-6b.
|
| ++ * @param handle a handle to a TurboJPEG transformer instance
|
| ++ * @param jpegBuf pointer to a buffer containing the JPEG image to transform
|
| ++ * @param jpegSize size of the JPEG image (in bytes)
|
| ++ * @param n the number of transformed JPEG images to generate
|
| ++ * @param dstBufs pointer to an array of n image buffers. <tt>dstBufs[i]</tt>
|
| ++ * will receive a JPEG image that has been transformed using the
|
| ++ * parameters in <tt>transforms[i]</tt>. TurboJPEG has the ability to
|
| ++ * reallocate the JPEG buffer to accommodate the size of the JPEG image.
|
| ++ * Thus, you can choose to:
|
| ++ * -# pre-allocate the JPEG buffer with an arbitrary size using
|
| ++ * #tjAlloc() and let TurboJPEG grow the buffer as needed,
|
| ++ * -# set <tt>dstBufs[i]</tt> to NULL to tell TurboJPEG to allocate the
|
| ++ * buffer for you, or
|
| ++ * -# pre-allocate the buffer to a "worst case" size determined by
|
| ++ * calling #tjBufSize() with the transformed or cropped width and
|
| ++ * height. This should ensure that the buffer never has to be
|
| ++ * re-allocated (setting #TJFLAG_NOREALLOC guarantees this.)
|
| ++ * .
|
| ++ * If you choose option 1, <tt>dstSizes[i]</tt> should be set to
|
| ++ * the size of your pre-allocated buffer. In any case, unless you have
|
| ++ * set #TJFLAG_NOREALLOC, you should always check <tt>dstBufs[i]</tt>
|
| ++ * upon return from this function, as it may have changed.
|
| ++ * @param dstSizes pointer to an array of n unsigned long variables that will
|
| ++ * receive the actual sizes (in bytes) of each transformed JPEG image.
|
| ++ * If <tt>dstBufs[i]</tt> points to a pre-allocated buffer, then
|
| ++ * <tt>dstSizes[i]</tt> should be set to the size of the buffer. Upon
|
| ++ * return, <tt>dstSizes[i]</tt> will contain the size of the JPEG image
|
| ++ * (in bytes.)
|
| ++ * @param transforms pointer to an array of n #tjtransform structures, each of
|
| ++ * which specifies the transform parameters and/or cropping region for
|
| ++ * the corresponding transformed output image.
|
| ++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
|
| ++ * "flags".
|
| + *
|
| -+ * TODO: a bit better instructions scheduling can be achieved by expanding
|
| -+ * idct_helper/transpose_4x4 macros and reordering instructions,
|
| -+ * but readability will suffer somewhat.
|
| ++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
|
| + */
|
| -+
|
| -+#define CONST_BITS 13
|
| -+
|
| -+#define FIX_0_211164243 (1730) /* FIX(0.211164243) */
|
| -+#define FIX_0_509795579 (4176) /* FIX(0.509795579) */
|
| -+#define FIX_0_601344887 (4926) /* FIX(0.601344887) */
|
| -+#define FIX_0_720959822 (5906) /* FIX(0.720959822) */
|
| -+#define FIX_0_765366865 (6270) /* FIX(0.765366865) */
|
| -+#define FIX_0_850430095 (6967) /* FIX(0.850430095) */
|
| -+#define FIX_0_899976223 (7373) /* FIX(0.899976223) */
|
| -+#define FIX_1_061594337 (8697) /* FIX(1.061594337) */
|
| -+#define FIX_1_272758580 (10426) /* FIX(1.272758580) */
|
| -+#define FIX_1_451774981 (11893) /* FIX(1.451774981) */
|
| -+#define FIX_1_847759065 (15137) /* FIX(1.847759065) */
|
| -+#define FIX_2_172734803 (17799) /* FIX(2.172734803) */
|
| -+#define FIX_2_562915447 (20995) /* FIX(2.562915447) */
|
| -+#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
|
| -+
|
| -+.balign 16
|
| -+jsimd_idct_4x4_neon_consts:
|
| -+ .short FIX_1_847759065 /* v0.4h[0] */
|
| -+ .short -FIX_0_765366865 /* v0.4h[1] */
|
| -+ .short -FIX_0_211164243 /* v0.4h[2] */
|
| -+ .short FIX_1_451774981 /* v0.4h[3] */
|
| -+ .short -FIX_2_172734803 /* d1[0] */
|
| -+ .short FIX_1_061594337 /* d1[1] */
|
| -+ .short -FIX_0_509795579 /* d1[2] */
|
| -+ .short -FIX_0_601344887 /* d1[3] */
|
| -+ .short FIX_0_899976223 /* v2.4h[0] */
|
| -+ .short FIX_2_562915447 /* v2.4h[1] */
|
| -+ .short 1 << (CONST_BITS+1) /* v2.4h[2] */
|
| -+ .short 0 /* v2.4h[3] */
|
| -+
|
| -+.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29
|
| -+ smull v28.4s, \x4, v2.4h[2]
|
| -+ smlal v28.4s, \x8, v0.4h[0]
|
| -+ smlal v28.4s, \x14, v0.4h[1]
|
| -+
|
| -+ smull v26.4s, \x16, v1.4h[2]
|
| -+ smlal v26.4s, \x12, v1.4h[3]
|
| -+ smlal v26.4s, \x10, v2.4h[0]
|
| -+ smlal v26.4s, \x6, v2.4h[1]
|
| -+
|
| -+ smull v30.4s, \x4, v2.4h[2]
|
| -+ smlsl v30.4s, \x8, v0.4h[0]
|
| -+ smlsl v30.4s, \x14, v0.4h[1]
|
| -+
|
| -+ smull v24.4s, \x16, v0.4h[2]
|
| -+ smlal v24.4s, \x12, v0.4h[3]
|
| -+ smlal v24.4s, \x10, v1.4h[0]
|
| -+ smlal v24.4s, \x6, v1.4h[1]
|
| -+
|
| -+ add v20.4s, v28.4s, v26.4s
|
| -+ sub v28.4s, v28.4s, v26.4s
|
| -+
|
| -+.if \shift > 16
|
| -+ srshr v20.4s, v20.4s, #\shift
|
| -+ srshr v28.4s, v28.4s, #\shift
|
| -+ xtn \y26, v20.4s
|
| -+ xtn \y29, v28.4s
|
| -+.else
|
| -+ rshrn \y26, v20.4s, #\shift
|
| -+ rshrn \y29, v28.4s, #\shift
|
| -+.endif
|
| -+
|
| -+ add v20.4s, v30.4s, v24.4s
|
| -+ sub v30.4s, v30.4s, v24.4s
|
| -+
|
| -+.if \shift > 16
|
| -+ srshr v20.4s, v20.4s, #\shift
|
| -+ srshr v30.4s, v30.4s, #\shift
|
| -+ xtn \y27, v20.4s
|
| -+ xtn \y28, v30.4s
|
| -+.else
|
| -+ rshrn \y27, v20.4s, #\shift
|
| -+ rshrn \y28, v30.4s, #\shift
|
| -+.endif
|
| -+
|
| -+.endm
|
| -+
|
| -+asm_function jsimd_idct_4x4_neon
|
| -+
|
| -+ DCT_TABLE .req x0
|
| -+ COEF_BLOCK .req x1
|
| -+ OUTPUT_BUF .req x2
|
| -+ OUTPUT_COL .req x3
|
| -+ TMP1 .req x0
|
| -+ TMP2 .req x1
|
| -+ TMP3 .req x2
|
| -+ TMP4 .req x15
|
| -+
|
| -+ /* Save all used NEON registers */
|
| -+ sub sp, sp, 272
|
| -+ str x15, [sp], 16
|
| -+ /* Load constants (v3.4h is just used for padding) */
|
| -+ adr TMP4, jsimd_idct_4x4_neon_consts
|
| -+ st1 {v0.8b - v3.8b}, [sp], 32
|
| -+ st1 {v4.8b - v7.8b}, [sp], 32
|
| -+ st1 {v8.8b - v11.8b}, [sp], 32
|
| -+ st1 {v12.8b - v15.8b}, [sp], 32
|
| -+ st1 {v16.8b - v19.8b}, [sp], 32
|
| -+ st1 {v20.8b - v23.8b}, [sp], 32
|
| -+ st1 {v24.8b - v27.8b}, [sp], 32
|
| -+ st1 {v28.8b - v31.8b}, [sp], 32
|
| -+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
|
| -+
|
| -+ /* Load all COEF_BLOCK into NEON registers with the following allocation:
|
| -+ * 0 1 2 3 | 4 5 6 7
|
| -+ * ---------+--------
|
| -+ * 0 | v4.4h | v5.4h
|
| -+ * 1 | v6.4h | v7.4h
|
| -+ * 2 | v8.4h | v9.4h
|
| -+ * 3 | v10.4h | v11.4h
|
| -+ * 4 | - | -
|
| -+ * 5 | v12.4h | v13.4h
|
| -+ * 6 | v14.4h | v15.4h
|
| -+ * 7 | v16.4h | v17.4h
|
| -+ */
|
| -+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32
|
| -+ ld1 {v8.4h, v9.4h, v10.4h, v11.4h}, [COEF_BLOCK], 32
|
| -+ add COEF_BLOCK, COEF_BLOCK, #16
|
| -+ ld1 {v12.4h, v13.4h, v14.4h, v15.4h}, [COEF_BLOCK], 32
|
| -+ ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16
|
| -+ /* dequantize */
|
| -+ ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
|
| -+ mul v4.4h, v4.4h, v18.4h
|
| -+ mul v5.4h, v5.4h, v19.4h
|
| -+ ins v4.2d[1], v5.2d[0] /* 128 bit q4 */
|
| -+ ld1 {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32
|
| -+ mul v6.4h, v6.4h, v20.4h
|
| -+ mul v7.4h, v7.4h, v21.4h
|
| -+ ins v6.2d[1], v7.2d[0] /* 128 bit q6 */
|
| -+ mul v8.4h, v8.4h, v22.4h
|
| -+ mul v9.4h, v9.4h, v23.4h
|
| -+ ins v8.2d[1], v9.2d[0] /* 128 bit q8 */
|
| -+ add DCT_TABLE, DCT_TABLE, #16
|
| -+ ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32
|
| -+ mul v10.4h, v10.4h, v24.4h
|
| -+ mul v11.4h, v11.4h, v25.4h
|
| -+ ins v10.2d[1], v11.2d[0] /* 128 bit q10 */
|
| -+ mul v12.4h, v12.4h, v26.4h
|
| -+ mul v13.4h, v13.4h, v27.4h
|
| -+ ins v12.2d[1], v13.2d[0] /* 128 bit q12 */
|
| -+ ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16
|
| -+ mul v14.4h, v14.4h, v28.4h
|
| -+ mul v15.4h, v15.4h, v29.4h
|
| -+ ins v14.2d[1], v15.2d[0] /* 128 bit q14 */
|
| -+ mul v16.4h, v16.4h, v30.4h
|
| -+ mul v17.4h, v17.4h, v31.4h
|
| -+ ins v16.2d[1], v17.2d[0] /* 128 bit q16 */
|
| -+
|
| -+ /* Pass 1 */
|
| -+ idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, v4.4h, v6.4h, v8.4h, v10.4h
|
| -+ transpose_4x4 v4, v6, v8, v10, v3
|
| -+ ins v10.2d[1], v11.2d[0]
|
| -+ idct_helper v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, v5.4h, v7.4h, v9.4h, v11.4h
|
| -+ transpose_4x4 v5, v7, v9, v11, v3
|
| -+ ins v10.2d[1], v11.2d[0]
|
| -+ /* Pass 2 */
|
| -+ idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, v26.4h, v27.4h, v28.4h, v29.4h
|
| -+ transpose_4x4 v26, v27, v28, v29, v3
|
| -+
|
| -+ /* Range limit */
|
| -+ movi v30.8h, #0x80
|
| -+ ins v26.2d[1], v27.2d[0]
|
| -+ ins v28.2d[1], v29.2d[0]
|
| -+ add v26.8h, v26.8h, v30.8h
|
| -+ add v28.8h, v28.8h, v30.8h
|
| -+ sqxtun v26.8b, v26.8h
|
| -+ sqxtun v27.8b, v28.8h
|
| -+
|
| -+ /* Store results to the output buffer */
|
| -+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
| -+ ldp TMP3, TMP4, [OUTPUT_BUF]
|
| -+ add TMP1, TMP1, OUTPUT_COL
|
| -+ add TMP2, TMP2, OUTPUT_COL
|
| -+ add TMP3, TMP3, OUTPUT_COL
|
| -+ add TMP4, TMP4, OUTPUT_COL
|
| -+
|
| -+#if defined(__ARMEL__) && !RESPECT_STRICT_ALIGNMENT
|
| -+ /* We can use much less instructions on little endian systems if the
|
| -+ * OS kernel is not configured to trap unaligned memory accesses
|
| -+ */
|
| -+ st1 {v26.s}[0], [TMP1], 4
|
| -+ st1 {v27.s}[0], [TMP3], 4
|
| -+ st1 {v26.s}[1], [TMP2], 4
|
| -+ st1 {v27.s}[1], [TMP4], 4
|
| -+#else
|
| -+ st1 {v26.b}[0], [TMP1], 1
|
| -+ st1 {v27.b}[0], [TMP3], 1
|
| -+ st1 {v26.b}[1], [TMP1], 1
|
| -+ st1 {v27.b}[1], [TMP3], 1
|
| -+ st1 {v26.b}[2], [TMP1], 1
|
| -+ st1 {v27.b}[2], [TMP3], 1
|
| -+ st1 {v26.b}[3], [TMP1], 1
|
| -+ st1 {v27.b}[3], [TMP3], 1
|
| -+
|
| -+ st1 {v26.b}[4], [TMP2], 1
|
| -+ st1 {v27.b}[4], [TMP4], 1
|
| -+ st1 {v26.b}[5], [TMP2], 1
|
| -+ st1 {v27.b}[5], [TMP4], 1
|
| -+ st1 {v26.b}[6], [TMP2], 1
|
| -+ st1 {v27.b}[6], [TMP4], 1
|
| -+ st1 {v26.b}[7], [TMP2], 1
|
| -+ st1 {v27.b}[7], [TMP4], 1
|
| -+#endif
|
| -+
|
| -+ /* vpop {v8.4h - v15.4h} ;not available */
|
| -+ sub sp, sp, #272
|
| -+ ldr x15, [sp], 16
|
| -+ ld1 {v0.8b - v3.8b}, [sp], 32
|
| -+ ld1 {v4.8b - v7.8b}, [sp], 32
|
| -+ ld1 {v8.8b - v11.8b}, [sp], 32
|
| -+ ld1 {v12.8b - v15.8b}, [sp], 32
|
| -+ ld1 {v16.8b - v19.8b}, [sp], 32
|
| -+ ld1 {v20.8b - v23.8b}, [sp], 32
|
| -+ ld1 {v24.8b - v27.8b}, [sp], 32
|
| -+ ld1 {v28.8b - v31.8b}, [sp], 32
|
| -+ blr x30
|
| -+
|
| -+ .unreq DCT_TABLE
|
| -+ .unreq COEF_BLOCK
|
| -+ .unreq OUTPUT_BUF
|
| -+ .unreq OUTPUT_COL
|
| -+ .unreq TMP1
|
| -+ .unreq TMP2
|
| -+ .unreq TMP3
|
| -+ .unreq TMP4
|
| -+
|
| -+.purgem idct_helper
|
| -+
|
| -+
|
| -+/*****************************************************************************/
|
| -+
|
| -+/*
|
| -+ * jsimd_idct_2x2_neon
|
| ++DLLEXPORT int DLLCALL tjTransform(tjhandle handle, unsigned char *jpegBuf,
|
| ++ unsigned long jpegSize, int n, unsigned char **dstBufs,
|
| ++ unsigned long *dstSizes, tjtransform *transforms, int flags);
|
| +
|
| +- RETURNS: 0 on success, -1 on error
|
| +-*/
|
| +-DLLEXPORT int DLLCALL tjDecompress(tjhandle j,
|
| +- unsigned char *srcbuf, unsigned long size,
|
| +- unsigned char *dstbuf, int width, int pitch, int height, int pixelsize,
|
| +- int flags);
|
| +
|
| ++/**
|
| ++ * Destroy a TurboJPEG compressor, decompressor, or transformer instance.
|
| + *
|
| -+ * This function contains inverse-DCT code for getting reduced-size
|
| -+ * 2x2 pixels output from an 8x8 DCT block. It uses the same calculations
|
| -+ * and produces exactly the same output as IJG's original 'jpeg_idct_2x2'
|
| -+ * function from jpeg-6b (jidctred.c).
|
| ++ * @param handle a handle to a TurboJPEG compressor, decompressor or
|
| ++ * transformer instance
|
| + *
|
| -+ * NOTE: jpeg-8 has an improved implementation of 2x2 inverse-DCT, which
|
| -+ * requires much less arithmetic operations and hence should be faster.
|
| -+ * The primary purpose of this particular NEON optimized function is
|
| -+ * bit exact compatibility with jpeg-6b.
|
| ++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
|
| + */
|
| ++DLLEXPORT int DLLCALL tjDestroy(tjhandle handle);
|
| +
|
| +-/*
|
| +- int tjDestroy(tjhandle h)
|
| +
|
| +- Frees structures associated with a compression or decompression instance
|
| +-
|
| +- [INPUT] h = instance handle (returned from a previous call to
|
| +- tjInitCompress() or tjInitDecompress()
|
| ++/**
|
| ++ * Allocate an image buffer for use with TurboJPEG. You should always use
|
| ++ * this function to allocate the JPEG destination buffer(s) for #tjCompress2()
|
| ++ * and #tjTransform() unless you are disabling automatic buffer
|
| ++ * (re)allocation (by setting #TJFLAG_NOREALLOC.)
|
| ++ *
|
| ++ * @param bytes the number of bytes to allocate
|
| ++ *
|
| ++ * @return a pointer to a newly-allocated buffer with the specified number of
|
| ++ * bytes
|
| ++ *
|
| ++ * @sa tjFree()
|
| ++ */
|
| ++DLLEXPORT unsigned char* DLLCALL tjAlloc(int bytes);
|
| +
|
| +- RETURNS: 0 on success, -1 on error
|
| +-*/
|
| +-DLLEXPORT int DLLCALL tjDestroy(tjhandle h);
|
| +
|
| ++/**
|
| ++ * Free an image buffer previously allocated by TurboJPEG. You should always
|
| ++ * use this function to free JPEG destination buffer(s) that were automatically
|
| ++ * (re)allocated by #tjCompress2() or #tjTransform() or that were manually
|
| ++ * allocated using #tjAlloc().
|
| ++ *
|
| ++ * @param buffer address of the buffer to free
|
| ++ *
|
| ++ * @sa tjAlloc()
|
| ++ */
|
| ++DLLEXPORT void DLLCALL tjFree(unsigned char *buffer);
|
| +
|
| +-/*
|
| +- char *tjGetErrorStr(void)
|
| +-
|
| +- Returns a descriptive error message explaining why the last command failed
|
| +-*/
|
| +
|
| -+.balign 8
|
| -+jsimd_idct_2x2_neon_consts:
|
| -+ .short -FIX_0_720959822 /* v14[0] */
|
| -+ .short FIX_0_850430095 /* v14[1] */
|
| -+ .short -FIX_1_272758580 /* v14[2] */
|
| -+ .short FIX_3_624509785 /* v14[3] */
|
| -+
|
| -+.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27
|
| -+ sshll v15.4s, \x4, #15
|
| -+ smull v26.4s, \x6, v14.4h[3]
|
| -+ smlal v26.4s, \x10, v14.4h[2]
|
| -+ smlal v26.4s, \x12, v14.4h[1]
|
| -+ smlal v26.4s, \x16, v14.4h[0]
|
| -+
|
| -+ add v20.4s, v15.4s, v26.4s
|
| -+ sub v15.4s, v15.4s, v26.4s
|
| -+
|
| -+.if \shift > 16
|
| -+ srshr v20.4s, v20.4s, #\shift
|
| -+ srshr v15.4s, v15.4s, #\shift
|
| -+ xtn \y26, v20.4s
|
| -+ xtn \y27, v15.4s
|
| -+.else
|
| -+ rshrn \y26, v20.4s, #\shift
|
| -+ rshrn \y27, v15.4s, #\shift
|
| -+.endif
|
| -+
|
| -+.endm
|
| -+
|
| -+asm_function jsimd_idct_2x2_neon
|
| -+
|
| -+ DCT_TABLE .req x0
|
| -+ COEF_BLOCK .req x1
|
| -+ OUTPUT_BUF .req x2
|
| -+ OUTPUT_COL .req x3
|
| -+ TMP1 .req x0
|
| -+ TMP2 .req x15
|
| -+
|
| -+ /* vpush {v8.4h - v15.4h} ; not available */
|
| -+ sub sp, sp, 208
|
| -+ str x15, [sp], 16
|
| -+
|
| -+ /* Load constants */
|
| -+ adr TMP2, jsimd_idct_2x2_neon_consts
|
| -+ st1 {v4.8b - v7.8b}, [sp], 32
|
| -+ st1 {v8.8b - v11.8b}, [sp], 32
|
| -+ st1 {v12.8b - v15.8b}, [sp], 32
|
| -+ st1 {v16.8b - v19.8b}, [sp], 32
|
| -+ st1 {v21.8b - v22.8b}, [sp], 16
|
| -+ st1 {v24.8b - v27.8b}, [sp], 32
|
| -+ st1 {v30.8b - v31.8b}, [sp], 16
|
| -+ ld1 {v14.4h}, [TMP2]
|
| -+
|
| -+ /* Load all COEF_BLOCK into NEON registers with the following allocation:
|
| -+ * 0 1 2 3 | 4 5 6 7
|
| -+ * ---------+--------
|
| -+ * 0 | v4.4h | v5.4h
|
| -+ * 1 | v6.4h | v7.4h
|
| -+ * 2 | - | -
|
| -+ * 3 | v10.4h | v11.4h
|
| -+ * 4 | - | -
|
| -+ * 5 | v12.4h | v13.4h
|
| -+ * 6 | - | -
|
| -+ * 7 | v16.4h | v17.4h
|
| -+ */
|
| -+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32
|
| -+ add COEF_BLOCK, COEF_BLOCK, #16
|
| -+ ld1 {v10.4h, v11.4h}, [COEF_BLOCK], 16
|
| -+ add COEF_BLOCK, COEF_BLOCK, #16
|
| -+ ld1 {v12.4h, v13.4h}, [COEF_BLOCK], 16
|
| -+ add COEF_BLOCK, COEF_BLOCK, #16
|
| -+ ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16
|
| -+ /* Dequantize */
|
| -+ ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
|
| -+ mul v4.4h, v4.4h, v18.4h
|
| -+ mul v5.4h, v5.4h, v19.4h
|
| -+ ins v4.2d[1], v5.2d[0]
|
| -+ mul v6.4h, v6.4h, v20.4h
|
| -+ mul v7.4h, v7.4h, v21.4h
|
| -+ ins v6.2d[1], v7.2d[0]
|
| -+ add DCT_TABLE, DCT_TABLE, #16
|
| -+ ld1 {v24.4h, v25.4h}, [DCT_TABLE], 16
|
| -+ mul v10.4h, v10.4h, v24.4h
|
| -+ mul v11.4h, v11.4h, v25.4h
|
| -+ ins v10.2d[1], v11.2d[0]
|
| -+ add DCT_TABLE, DCT_TABLE, #16
|
| -+ ld1 {v26.4h, v27.4h}, [DCT_TABLE], 16
|
| -+ mul v12.4h, v12.4h, v26.4h
|
| -+ mul v13.4h, v13.4h, v27.4h
|
| -+ ins v12.2d[1], v13.2d[0]
|
| -+ add DCT_TABLE, DCT_TABLE, #16
|
| -+ ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16
|
| -+ mul v16.4h, v16.4h, v30.4h
|
| -+ mul v17.4h, v17.4h, v31.4h
|
| -+ ins v16.2d[1], v17.2d[0]
|
| -+
|
| -+ /* Pass 1 */
|
| -+#if 0
|
| -+ idct_helper v4.4h, v6.4h, v10.4h, v12.4h, v16.4h, 13, v4.4h, v6.4h
|
| -+ transpose_4x4 v4.4h, v6.4h, v8.4h, v10.4h
|
| -+ idct_helper v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h
|
| -+ transpose_4x4 v5.4h, v7.4h, v9.4h, v11.4h
|
| -+#else
|
| -+ smull v26.4s, v6.4h, v14.4h[3]
|
| -+ smlal v26.4s, v10.4h, v14.4h[2]
|
| -+ smlal v26.4s, v12.4h, v14.4h[1]
|
| -+ smlal v26.4s, v16.4h, v14.4h[0]
|
| -+ smull v24.4s, v7.4h, v14.4h[3]
|
| -+ smlal v24.4s, v11.4h, v14.4h[2]
|
| -+ smlal v24.4s, v13.4h, v14.4h[1]
|
| -+ smlal v24.4s, v17.4h, v14.4h[0]
|
| -+ sshll v15.4s, v4.4h, #15
|
| -+ sshll v30.4s, v5.4h, #15
|
| -+ add v20.4s, v15.4s, v26.4s
|
| -+ sub v15.4s, v15.4s, v26.4s
|
| -+ rshrn v4.4h, v20.4s, #13
|
| -+ rshrn v6.4h, v15.4s, #13
|
| -+ add v20.4s, v30.4s, v24.4s
|
| -+ sub v15.4s, v30.4s, v24.4s
|
| -+ rshrn v5.4h, v20.4s, #13
|
| -+ rshrn v7.4h, v15.4s, #13
|
| -+ ins v4.2d[1], v5.2d[0]
|
| -+ ins v6.2d[1], v7.2d[0]
|
| -+ transpose v4, v6, v3, .16b, .8h
|
| -+ transpose v6, v10, v3, .16b, .4s
|
| -+ ins v11.2d[0], v10.2d[1]
|
| -+ ins v7.2d[0], v6.2d[1]
|
| -+#endif
|
| -+
|
| -+ /* Pass 2 */
|
| -+ idct_helper v4.4h, v6.4h, v10.4h, v7.4h, v11.4h, 20, v26.4h, v27.4h
|
| -+
|
| -+ /* Range limit */
|
| -+ movi v30.8h, #0x80
|
| -+ ins v26.2d[1], v27.2d[0]
|
| -+ add v26.8h, v26.8h, v30.8h
|
| -+ sqxtun v30.8b, v26.8h
|
| -+ ins v26.2d[0], v30.2d[0]
|
| -+ sqxtun v27.8b, v26.8h
|
| -+
|
| -+ /* Store results to the output buffer */
|
| -+ ldp TMP1, TMP2, [OUTPUT_BUF]
|
| -+ add TMP1, TMP1, OUTPUT_COL
|
| -+ add TMP2, TMP2, OUTPUT_COL
|
| -+
|
| -+ st1 {v26.b}[0], [TMP1], 1
|
| -+ st1 {v27.b}[4], [TMP1], 1
|
| -+ st1 {v26.b}[1], [TMP2], 1
|
| -+ st1 {v27.b}[5], [TMP2], 1
|
| -+
|
| -+ sub sp, sp, #208
|
| -+ ldr x15, [sp], 16
|
| -+ ld1 {v4.8b - v7.8b}, [sp], 32
|
| -+ ld1 {v8.8b - v11.8b}, [sp], 32
|
| -+ ld1 {v12.8b - v15.8b}, [sp], 32
|
| -+ ld1 {v16.8b - v19.8b}, [sp], 32
|
| -+ ld1 {v21.8b - v22.8b}, [sp], 16
|
| -+ ld1 {v24.8b - v27.8b}, [sp], 32
|
| -+ ld1 {v30.8b - v31.8b}, [sp], 16
|
| -+ blr x30
|
| -+
|
| -+ .unreq DCT_TABLE
|
| -+ .unreq COEF_BLOCK
|
| -+ .unreq OUTPUT_BUF
|
| -+ .unreq OUTPUT_COL
|
| -+ .unreq TMP1
|
| -+ .unreq TMP2
|
| -+
|
| -+.purgem idct_helper
|
| -+
|
| -+
|
| -+/*****************************************************************************/
|
| -+
|
| -+/*
|
| -+ * jsimd_ycc_extrgb_convert_neon
|
| -+ * jsimd_ycc_extbgr_convert_neon
|
| -+ * jsimd_ycc_extrgbx_convert_neon
|
| -+ * jsimd_ycc_extbgrx_convert_neon
|
| -+ * jsimd_ycc_extxbgr_convert_neon
|
| -+ * jsimd_ycc_extxrgb_convert_neon
|
| ++/**
|
| ++ * Returns a descriptive error message explaining why the last command failed.
|
| + *
|
| -+ * Colorspace conversion YCbCr -> RGB
|
| ++ * @return a descriptive error message explaining why the last command failed.
|
| + */
|
| + DLLEXPORT char* DLLCALL tjGetErrorStr(void);
|
| +
|
| +
|
| ++/* Backward compatibility functions and macros (nothing to see here) */
|
| ++#define NUMSUBOPT TJ_NUMSAMP
|
| ++#define TJ_444 TJSAMP_444
|
| ++#define TJ_422 TJSAMP_422
|
| ++#define TJ_420 TJSAMP_420
|
| ++#define TJ_411 TJSAMP_420
|
| ++#define TJ_GRAYSCALE TJSAMP_GRAY
|
| +
|
| -+.macro do_load size
|
| -+ .if \size == 8
|
| -+ ld1 {v4.8b}, [U], 8
|
| -+ ld1 {v5.8b}, [V], 8
|
| -+ ld1 {v0.8b}, [Y], 8
|
| -+ prfm PLDL1KEEP, [U, #64]
|
| -+ prfm PLDL1KEEP, [V, #64]
|
| -+ prfm PLDL1KEEP, [Y, #64]
|
| -+ .elseif \size == 4
|
| -+ ld1 {v4.b}[0], [U], 1
|
| -+ ld1 {v4.b}[1], [U], 1
|
| -+ ld1 {v4.b}[2], [U], 1
|
| -+ ld1 {v4.b}[3], [U], 1
|
| -+ ld1 {v5.b}[0], [V], 1
|
| -+ ld1 {v5.b}[1], [V], 1
|
| -+ ld1 {v5.b}[2], [V], 1
|
| -+ ld1 {v5.b}[3], [V], 1
|
| -+ ld1 {v0.b}[0], [Y], 1
|
| -+ ld1 {v0.b}[1], [Y], 1
|
| -+ ld1 {v0.b}[2], [Y], 1
|
| -+ ld1 {v0.b}[3], [Y], 1
|
| -+ .elseif \size == 2
|
| -+ ld1 {v4.b}[4], [U], 1
|
| -+ ld1 {v4.b}[5], [U], 1
|
| -+ ld1 {v5.b}[4], [V], 1
|
| -+ ld1 {v5.b}[5], [V], 1
|
| -+ ld1 {v0.b}[4], [Y], 1
|
| -+ ld1 {v0.b}[5], [Y], 1
|
| -+ .elseif \size == 1
|
| -+ ld1 {v4.b}[6], [U], 1
|
| -+ ld1 {v5.b}[6], [V], 1
|
| -+ ld1 {v0.b}[6], [Y], 1
|
| -+ .else
|
| -+ .error unsupported macroblock size
|
| -+ .endif
|
| -+.endm
|
| -+
|
| -+.macro do_store bpp, size
|
| -+ .if \bpp == 24
|
| -+ .if \size == 8
|
| -+ st3 {v10.8b, v11.8b, v12.8b}, [RGB], 24
|
| -+ .elseif \size == 4
|
| -+ st3 {v10.b, v11.b, v12.b}[0], [RGB], 3
|
| -+ st3 {v10.b, v11.b, v12.b}[1], [RGB], 3
|
| -+ st3 {v10.b, v11.b, v12.b}[2], [RGB], 3
|
| -+ st3 {v10.b, v11.b, v12.b}[3], [RGB], 3
|
| -+ .elseif \size == 2
|
| -+ st3 {v10.b, v11.b, v12.b}[4], [RGB], 3
|
| -+ st3 {v10.b, v11.b, v12.b}[5], [RGB], 3
|
| -+ .elseif \size == 1
|
| -+ st3 {v10.b, v11.b, v12.b}[6], [RGB], 3
|
| -+ .else
|
| -+ .error unsupported macroblock size
|
| -+ .endif
|
| -+ .elseif \bpp == 32
|
| -+ .if \size == 8
|
| -+ st4 {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], 32
|
| -+ .elseif \size == 4
|
| -+ st4 {v10.b, v11.b, v12.b, v13.b}[0], [RGB], 4
|
| -+ st4 {v10.b, v11.b, v12.b, v13.b}[1], [RGB], 4
|
| -+ st4 {v10.b, v11.b, v12.b, v13.b}[2], [RGB], 4
|
| -+ st4 {v10.b, v11.b, v12.b, v13.b}[3], [RGB], 4
|
| -+ .elseif \size == 2
|
| -+ st4 {v10.b, v11.b, v12.b, v13.b}[4], [RGB], 4
|
| -+ st4 {v10.b, v11.b, v12.b, v13.b}[5], [RGB], 4
|
| -+ .elseif \size == 1
|
| -+ st4 {v10.b, v11.b, v12.b, v13.b}[6], [RGB], 4
|
| -+ .else
|
| -+ .error unsupported macroblock size
|
| -+ .endif
|
| -+ .elseif \bpp==16
|
| -+ .if \size == 8
|
| -+ st1 {v25.8h}, [RGB],16
|
| -+ .elseif \size == 4
|
| -+ st1 {v25.4h}, [RGB],8
|
| -+ .elseif \size == 2
|
| -+ st1 {v25.h}[4], [RGB],2
|
| -+ st1 {v25.h}[5], [RGB],2
|
| -+ .elseif \size == 1
|
| -+ st1 {v25.h}[6], [RGB],2
|
| -+ .else
|
| -+ .error unsupported macroblock size
|
| -+ .endif
|
| -+ .else
|
| -+ .error unsupported bpp
|
| -+ .endif
|
| -+.endm
|
| -+
|
| -+.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, rsize, g_offs, gsize, b_offs, bsize, defsize
|
| ++#define TJ_BGR 1
|
| ++#define TJ_BOTTOMUP TJFLAG_BOTTOMUP
|
| ++#define TJ_FORCEMMX TJFLAG_FORCEMMX
|
| ++#define TJ_FORCESSE TJFLAG_FORCESSE
|
| ++#define TJ_FORCESSE2 TJFLAG_FORCESSE2
|
| ++#define TJ_ALPHAFIRST 64
|
| ++#define TJ_FORCESSE3 TJFLAG_FORCESSE3
|
| ++#define TJ_FASTUPSAMPLE TJFLAG_FASTUPSAMPLE
|
| ++#define TJ_YUV 512
|
| +
|
| -+/*
|
| -+ * 2-stage pipelined YCbCr->RGB conversion
|
| -+ */
|
| ++DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height);
|
| ++
|
| ++DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height,
|
| ++ int jpegSubsamp);
|
| +
|
| -+.macro do_yuv_to_rgb_stage1
|
| -+ uaddw v6.8h, v2.8h, v4.8b /* q3 = u - 128 */
|
| -+ uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
|
| -+ smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
|
| -+ smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
|
| -+ smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
|
| -+ smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
|
| -+ smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
|
| -+ smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
|
| -+ smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */
|
| -+ smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */
|
| -+.endm
|
| -+
|
| -+.macro do_yuv_to_rgb_stage2
|
| -+ rshrn v20.4h, v20.4s, #15
|
| -+ rshrn2 v20.8h, v22.4s, #15
|
| -+ rshrn v24.4h, v24.4s, #14
|
| -+ rshrn2 v24.8h, v26.4s, #14
|
| -+ rshrn v28.4h, v28.4s, #14
|
| -+ rshrn2 v28.8h, v30.4s, #14
|
| -+ uaddw v20.8h, v20.8h, v0.8b
|
| -+ uaddw v24.8h, v24.8h, v0.8b
|
| -+ uaddw v28.8h, v28.8h, v0.8b
|
| -+.if \bpp != 16
|
| -+ sqxtun v1\g_offs\defsize, v20.8h
|
| -+ sqxtun v1\r_offs\defsize, v24.8h
|
| -+ sqxtun v1\b_offs\defsize, v28.8h
|
| -+.else
|
| -+ sqshlu v21.8h, v20.8h, #8
|
| -+ sqshlu v25.8h, v24.8h, #8
|
| -+ sqshlu v29.8h, v28.8h, #8
|
| -+ sri v25.8h, v21.8h, #5
|
| -+ sri v25.8h, v29.8h, #11
|
| -+.endif
|
| -+
|
| -+.endm
|
| -+
|
| -+.macro do_yuv_to_rgb_stage2_store_load_stage1
|
| -+ rshrn v20.4h, v20.4s, #15
|
| -+ rshrn v24.4h, v24.4s, #14
|
| -+ rshrn v28.4h, v28.4s, #14
|
| -+ ld1 {v4.8b}, [U], 8
|
| -+ rshrn2 v20.8h, v22.4s, #15
|
| -+ rshrn2 v24.8h, v26.4s, #14
|
| -+ rshrn2 v28.8h, v30.4s, #14
|
| -+ ld1 {v5.8b}, [V], 8
|
| -+ uaddw v20.8h, v20.8h, v0.8b
|
| -+ uaddw v24.8h, v24.8h, v0.8b
|
| -+ uaddw v28.8h, v28.8h, v0.8b
|
| -+.if \bpp != 16 /**************** rgb24/rgb32 *********************************/
|
| -+ sqxtun v1\g_offs\defsize, v20.8h
|
| -+ ld1 {v0.8b}, [Y], 8
|
| -+ sqxtun v1\r_offs\defsize, v24.8h
|
| -+ prfm PLDL1KEEP, [U, #64]
|
| -+ prfm PLDL1KEEP, [V, #64]
|
| -+ prfm PLDL1KEEP, [Y, #64]
|
| -+ sqxtun v1\b_offs\defsize, v28.8h
|
| -+ uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */
|
| -+ uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
|
| -+ smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
|
| -+ smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
|
| -+ smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
|
| -+ smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
|
| -+ smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
|
| -+ smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
|
| -+.else /**************************** rgb565 ***********************************/
|
| -+ sqshlu v21.8h, v20.8h, #8
|
| -+ sqshlu v25.8h, v24.8h, #8
|
| -+ sqshlu v29.8h, v28.8h, #8
|
| -+ uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */
|
| -+ uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
|
| -+ ld1 {v0.8b}, [Y], 8
|
| -+ smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
|
| -+ smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
|
| -+ smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
|
| -+ smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
|
| -+ sri v25.8h, v21.8h, #5
|
| -+ smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
|
| -+ smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
|
| -+ prfm PLDL1KEEP, [U, #64]
|
| -+ prfm PLDL1KEEP, [V, #64]
|
| -+ prfm PLDL1KEEP, [Y, #64]
|
| -+ sri v25.8h, v29.8h, #11
|
| -+.endif
|
| -+ do_store \bpp, 8
|
| -+ smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */
|
| -+ smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */
|
| -+.endm
|
| -+
|
| -+.macro do_yuv_to_rgb
|
| -+ do_yuv_to_rgb_stage1
|
| -+ do_yuv_to_rgb_stage2
|
| -+.endm
|
| -+
|
| -+/* Apple gas crashes on adrl, work around that by using adr.
|
| -+ * But this requires a copy of these constants for each function.
|
| ++DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf,
|
| ++ int width, int pitch, int height, int pixelSize, unsigned char *dstBuf,
|
| ++ unsigned long *compressedSize, int jpegSubsamp, int jpegQual, int flags);
|
| ++
|
| ++DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle,
|
| ++ unsigned char *srcBuf, int width, int pitch, int height, int pixelSize,
|
| ++ unsigned char *dstBuf, int subsamp, int flags);
|
| ++
|
| ++DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle,
|
| ++ unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height);
|
| ++
|
| ++DLLEXPORT int DLLCALL tjDecompress(tjhandle handle,
|
| ++ unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
|
| ++ int width, int pitch, int height, int pixelSize, int flags);
|
| ++
|
| ++
|
| ++/**
|
| ++ * @}
|
| + */
|
| +
|
| -+.balign 16
|
| -+jsimd_ycc_\colorid\()_neon_consts:
|
| -+ .short 0, 0, 0, 0
|
| -+ .short 22971, -11277, -23401, 29033
|
| -+ .short -128, -128, -128, -128
|
| -+ .short -128, -128, -128, -128
|
| -+
|
| -+asm_function jsimd_ycc_\colorid\()_convert_neon
|
| -+ OUTPUT_WIDTH .req x0
|
| -+ INPUT_BUF .req x1
|
| -+ INPUT_ROW .req x2
|
| -+ OUTPUT_BUF .req x3
|
| -+ NUM_ROWS .req x4
|
| -+
|
| -+ INPUT_BUF0 .req x5
|
| -+ INPUT_BUF1 .req x6
|
| -+ INPUT_BUF2 .req INPUT_BUF
|
| -+
|
| -+ RGB .req x7
|
| -+ Y .req x8
|
| -+ U .req x9
|
| -+ V .req x10
|
| -+ N .req x15
|
| -+
|
| -+ sub sp, sp, 336
|
| -+ str x15, [sp], 16
|
| -+ /* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
|
| -+ adr x15, jsimd_ycc_\colorid\()_neon_consts
|
| -+ /* Save NEON registers */
|
| -+ st1 {v0.8b - v3.8b}, [sp], 32
|
| -+ st1 {v4.8b - v7.8b}, [sp], 32
|
| -+ st1 {v8.8b - v11.8b}, [sp], 32
|
| -+ st1 {v12.8b - v15.8b}, [sp], 32
|
| -+ st1 {v16.8b - v19.8b}, [sp], 32
|
| -+ st1 {v20.8b - v23.8b}, [sp], 32
|
| -+ st1 {v24.8b - v27.8b}, [sp], 32
|
| -+ st1 {v28.8b - v31.8b}, [sp], 32
|
| -+ ld1 {v0.4h, v1.4h}, [x15], 16
|
| -+ ld1 {v2.8h}, [x15]
|
| -+
|
| -+ /* Save ARM registers and handle input arguments */
|
| -+ /* push {x4, x5, x6, x7, x8, x9, x10, x30} */
|
| -+ stp x4, x5, [sp], 16
|
| -+ stp x6, x7, [sp], 16
|
| -+ stp x8, x9, [sp], 16
|
| -+ stp x10, x30, [sp], 16
|
| -+ ldr INPUT_BUF0, [INPUT_BUF]
|
| -+ ldr INPUT_BUF1, [INPUT_BUF, 8]
|
| -+ ldr INPUT_BUF2, [INPUT_BUF, 16]
|
| -+ .unreq INPUT_BUF
|
| -+
|
| -+ /* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */
|
| -+ movi v10.16b, #255
|
| -+ movi v13.16b, #255
|
| -+
|
| -+ /* Outer loop over scanlines */
|
| -+ cmp NUM_ROWS, #1
|
| -+ blt 9f
|
| -+0:
|
| -+ lsl x16, INPUT_ROW, #3
|
| -+ ldr Y, [INPUT_BUF0, x16]
|
| -+ ldr U, [INPUT_BUF1, x16]
|
| -+ mov N, OUTPUT_WIDTH
|
| -+ ldr V, [INPUT_BUF2, x16]
|
| -+ add INPUT_ROW, INPUT_ROW, #1
|
| -+ ldr RGB, [OUTPUT_BUF], #8
|
| -+
|
| -+ /* Inner loop over pixels */
|
| -+ subs N, N, #8
|
| -+ blt 3f
|
| -+ do_load 8
|
| -+ do_yuv_to_rgb_stage1
|
| -+ subs N, N, #8
|
| -+ blt 2f
|
| -+1:
|
| -+ do_yuv_to_rgb_stage2_store_load_stage1
|
| -+ subs N, N, #8
|
| -+ bge 1b
|
| -+2:
|
| -+ do_yuv_to_rgb_stage2
|
| -+ do_store \bpp, 8
|
| -+ tst N, #7
|
| -+ beq 8f
|
| -+3:
|
| -+ tst N, #4
|
| -+ beq 3f
|
| -+ do_load 4
|
| -+3:
|
| -+ tst N, #2
|
| -+ beq 4f
|
| -+ do_load 2
|
| -+4:
|
| -+ tst N, #1
|
| -+ beq 5f
|
| -+ do_load 1
|
| -+5:
|
| -+ do_yuv_to_rgb
|
| -+ tst N, #4
|
| -+ beq 6f
|
| -+ do_store \bpp, 4
|
| -+6:
|
| -+ tst N, #2
|
| -+ beq 7f
|
| -+ do_store \bpp, 2
|
| -+7:
|
| -+ tst N, #1
|
| -+ beq 8f
|
| -+ do_store \bpp, 1
|
| -+8:
|
| -+ subs NUM_ROWS, NUM_ROWS, #1
|
| -+ bgt 0b
|
| -+9:
|
| -+ /* Restore all registers and return */
|
| -+ sub sp, sp, #336
|
| -+ ldr x15, [sp], 16
|
| -+ ld1 {v0.8b - v3.8b}, [sp], 32
|
| -+ ld1 {v4.8b - v7.8b}, [sp], 32
|
| -+ ld1 {v8.8b - v11.8b}, [sp], 32
|
| -+ ld1 {v12.8b - v15.8b}, [sp], 32
|
| -+ ld1 {v16.8b - v19.8b}, [sp], 32
|
| -+ ld1 {v20.8b - v23.8b}, [sp], 32
|
| -+ ld1 {v24.8b - v27.8b}, [sp], 32
|
| -+ ld1 {v28.8b - v31.8b}, [sp], 32
|
| -+ /* pop {r4, r5, r6, r7, r8, r9, r10, pc} */
|
| -+ ldp x4, x5, [sp], 16
|
| -+ ldp x6, x7, [sp], 16
|
| -+ ldp x8, x9, [sp], 16
|
| -+ ldp x10, x30, [sp], 16
|
| -+ br x30
|
| -+ .unreq OUTPUT_WIDTH
|
| -+ .unreq INPUT_ROW
|
| -+ .unreq OUTPUT_BUF
|
| -+ .unreq NUM_ROWS
|
| -+ .unreq INPUT_BUF0
|
| -+ .unreq INPUT_BUF1
|
| -+ .unreq INPUT_BUF2
|
| -+ .unreq RGB
|
| -+ .unreq Y
|
| -+ .unreq U
|
| -+ .unreq V
|
| -+ .unreq N
|
| -+
|
| -+.purgem do_yuv_to_rgb
|
| -+.purgem do_yuv_to_rgb_stage1
|
| -+.purgem do_yuv_to_rgb_stage2
|
| -+.purgem do_yuv_to_rgb_stage2_store_load_stage1
|
| -+.endm
|
| -+
|
| -+/*--------------------------------- id ----- bpp R rsize G gsize B bsize defsize */
|
| -+generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h, .8b
|
| -+generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b
|
| -+generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, .4h, 1, .4h, 2, .4h, .8b
|
| -+generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, .4h, 1, .4h, 0, .4h, .8b
|
| -+generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, .4h, 2, .4h, 1, .4h, .8b
|
| -+generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, .4h, 2, .4h, 3, .4h, .8b
|
| -+generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, .4h, 0, .4h, 0, .4h, .8b
|
| -+.purgem do_load
|
| -+.purgem do_store
|
| + #ifdef __cplusplus
|
| + }
|
| + #endif
|
| ++
|
| ++#endif
|
| +Index: turbojpegl.c
|
| +===================================================================
|
| +--- turbojpegl.c (revision 829)
|
| ++++ turbojpegl.c (working copy)
|
| +@@ -149,6 +149,10 @@
|
| + #error "TurboJPEG requires JPEG colorspace extensions"
|
| + #endif
|
| +
|
| ++ if(flags&TJ_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
|
| ++ else if(flags&TJ_FORCESSE) putenv("JSIMD_FORCESSE=1");
|
| ++ else if(flags&TJ_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
|
| ++
|
| + if(setjmp(j->jerr.jb))
|
| + { // this will execute if LIBJPEG has an error
|
| + if(row_pointer) free(row_pointer);
|
| +@@ -188,7 +192,8 @@
|
| + j->cinfo.image_height-j->cinfo.next_scanline);
|
| + }
|
| + jpeg_finish_compress(&j->cinfo);
|
| +- *size=TJBUFSIZE(j->cinfo.image_width, j->cinfo.image_height)-(j->jdms.free_in_buffer);
|
| ++ *size=TJBUFSIZE(j->cinfo.image_width, j->cinfo.image_height)
|
| ++ -(unsigned long)(j->jdms.free_in_buffer);
|
| +
|
| + if(row_pointer) free(row_pointer);
|
| + return 0;
|
| +@@ -287,6 +292,10 @@
|
| +
|
| + if(pitch==0) pitch=width*ps;
|
| +
|
| ++ if(flags&TJ_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
|
| ++ else if(flags&TJ_FORCESSE) putenv("JSIMD_FORCESSE=1");
|
| ++ else if(flags&TJ_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
|
| ++
|
| + if(setjmp(j->jerr.jb))
|
| + { // this will execute if LIBJPEG has an error
|
| + if(row_pointer) free(row_pointer);
|
| +Index: wrppm.c
|
| +===================================================================
|
| +--- wrppm.c (revision 829)
|
| ++++ wrppm.c (working copy)
|
| +@@ -2,6 +2,7 @@
|
| + * wrppm.c
|
| + *
|
| + * Copyright (C) 1991-1996, Thomas G. Lane.
|
| ++ * Modified 2009 by Guido Vollbeding.
|
| + * This file is part of the Independent JPEG Group's software.
|
| + * For conditions of distribution and use, see the accompanying README file.
|
| + *
|
| +@@ -40,11 +41,11 @@
|
| + #define BYTESPERSAMPLE 1
|
| + #define PPM_MAXVAL 255
|
| + #else
|
| +-/* The word-per-sample format always puts the LSB first. */
|
| ++/* The word-per-sample format always puts the MSB first. */
|
| + #define PUTPPMSAMPLE(ptr,v) \
|
| + { register int val_ = v; \
|
| ++ *ptr++ = (char) ((val_ >> 8) & 0xFF); \
|
| + *ptr++ = (char) (val_ & 0xFF); \
|
| +- *ptr++ = (char) ((val_ >> 8) & 0xFF); \
|
| + }
|
| + #define BYTESPERSAMPLE 2
|
| + #define PPM_MAXVAL ((1<<BITS_IN_JSAMPLE)-1)
|
|
|