OLD | NEW |
1 // Copyright 2007 Alan Donovan. All rights reserved. | 1 // Copyright 2015 The Bazel Authors. All rights reserved. |
2 // | 2 // |
3 // Author: Alan Donovan <adonovan@google.com> | |
4 // | |
5 // Licensed under the Apache License, Version 2.0 (the "License"); | 3 // Licensed under the Apache License, Version 2.0 (the "License"); |
6 // you may not use this file except in compliance with the License. | 4 // you may not use this file except in compliance with the License. |
7 // You may obtain a copy of the License at | 5 // You may obtain a copy of the License at |
8 // | 6 // |
9 // http://www.apache.org/licenses/LICENSE-2.0 | 7 // http://www.apache.org/licenses/LICENSE-2.0 |
10 // | 8 // |
11 // Unless required by applicable law or agreed to in writing, software | 9 // Unless required by applicable law or agreed to in writing, software |
12 // distributed under the License is distributed on an "AS IS" BASIS, | 10 // distributed under the License is distributed on an "AS IS" BASIS, |
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 // See the License for the specific language governing permissions and | 12 // See the License for the specific language governing permissions and |
(...skipping 16 matching lines...) Expand all Loading... |
31 #include <stdarg.h> | 29 #include <stdarg.h> |
32 #include <stdio.h> | 30 #include <stdio.h> |
33 #include <stdlib.h> | 31 #include <stdlib.h> |
34 #include <string.h> | 32 #include <string.h> |
35 #include <sys/mman.h> | 33 #include <sys/mman.h> |
36 #include <unistd.h> | 34 #include <unistd.h> |
37 #include <limits.h> | 35 #include <limits.h> |
38 #include <limits> | 36 #include <limits> |
39 #include <vector> | 37 #include <vector> |
40 | 38 |
| 39 #include "third_party/ijar/mapped_file.h" |
41 #include "third_party/ijar/zip.h" | 40 #include "third_party/ijar/zip.h" |
42 #include <zlib.h> | 41 #include <zlib.h> |
43 | 42 |
44 #define LOCAL_FILE_HEADER_SIGNATURE 0x04034b50 | 43 #define LOCAL_FILE_HEADER_SIGNATURE 0x04034b50 |
45 #define CENTRAL_FILE_HEADER_SIGNATURE 0x02014b50 | 44 #define CENTRAL_FILE_HEADER_SIGNATURE 0x02014b50 |
46 #define END_OF_CENTRAL_DIR_SIGNATURE 0x06054b50 | 45 #define DIGITAL_SIGNATURE 0x05054b50 |
47 #define DATA_DESCRIPTOR_SIGNATURE 0x08074b50 | 46 #define ZIP64_EOCD_SIGNATURE 0x06064b50 |
| 47 #define ZIP64_EOCD_LOCATOR_SIGNATURE 0x07064b50 |
| 48 #define EOCD_SIGNATURE 0x06054b50 |
| 49 #define DATA_DESCRIPTOR_SIGNATURE 0x08074b50 |
| 50 |
| 51 #define U2_MAX 0xffff |
| 52 #define U4_MAX 0xffffffffUL |
| 53 |
| 54 #define ZIP64_EOCD_LOCATOR_SIZE 20 |
| 55 // zip64 eocd is fixed size in the absence of a zip64 extensible data sector |
| 56 #define ZIP64_EOCD_FIXED_SIZE 56 |
48 | 57 |
49 // version to extract: 1.0 - default value from APPNOTE.TXT. | 58 // version to extract: 1.0 - default value from APPNOTE.TXT. |
50 // Output JAR files contain no extra ZIP features, so this is enough. | 59 // Output JAR files contain no extra ZIP features, so this is enough. |
51 #define ZIP_VERSION_TO_EXTRACT 10 | 60 #define ZIP_VERSION_TO_EXTRACT 10 |
52 #define COMPRESSION_METHOD_STORED 0 // no compression | 61 #define COMPRESSION_METHOD_STORED 0 // no compression |
53 #define COMPRESSION_METHOD_DEFLATED 8 | 62 #define COMPRESSION_METHOD_DEFLATED 8 |
54 | 63 |
55 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSED (1 << 3) | 64 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSED (1 << 3) |
56 #define GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED (1 << 11) | 65 #define GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED (1 << 11) |
57 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED ((1 << 2) | (1 << 1)) | 66 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED ((1 << 2) | (1 << 1)) |
58 #define GENERAL_PURPOSE_BIT_FLAG_SUPPORTED \ | 67 #define GENERAL_PURPOSE_BIT_FLAG_SUPPORTED \ |
59 (GENERAL_PURPOSE_BIT_FLAG_COMPRESSED \ | 68 (GENERAL_PURPOSE_BIT_FLAG_COMPRESSED \ |
60 | GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED \ | 69 | GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED \ |
61 | GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED) | 70 | GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED) |
62 | 71 |
63 namespace devtools_ijar { | 72 namespace devtools_ijar { |
64 // In the absence of ZIP64 support, zip files are limited to 4GB. | 73 // In the absence of ZIP64 support, zip files are limited to 4GB. |
65 // http://www.info-zip.org/FAQ.html#limits | 74 // http://www.info-zip.org/FAQ.html#limits |
66 static const u8 kMaximumOutputSize = std::numeric_limits<uint32_t>::max(); | 75 static const u8 kMaximumOutputSize = std::numeric_limits<uint32_t>::max(); |
67 | 76 |
68 static bool ProcessCentralDirEntry(const u1 *&p, | |
69 size_t *compressed_size, | |
70 size_t *uncompressed_size, | |
71 char *filename, | |
72 size_t filename_size, | |
73 u4 *attr, | |
74 u4 *offset); | |
75 | |
76 // | 77 // |
77 // A class representing a ZipFile for reading. Its public API is exposed | 78 // A class representing a ZipFile for reading. Its public API is exposed |
78 // using the ZipExtractor abstract class. | 79 // using the ZipExtractor abstract class. |
79 // | 80 // |
80 class InputZipFile : public ZipExtractor { | 81 class InputZipFile : public ZipExtractor { |
81 public: | 82 public: |
82 InputZipFile(ZipExtractorProcessor *processor, int fd, off_t in_length, | 83 InputZipFile(ZipExtractorProcessor *processor, const char* filename); |
83 off_t in_offset, const u1* zipdata_in, const u1* central_dir); | |
84 virtual ~InputZipFile(); | 84 virtual ~InputZipFile(); |
85 | 85 |
86 virtual const char* GetError() { | 86 virtual const char* GetError() { |
87 if (errmsg[0] == 0) { | 87 if (errmsg[0] == 0) { |
88 return NULL; | 88 return NULL; |
89 } | 89 } |
90 return errmsg; | 90 return errmsg; |
91 } | 91 } |
92 | 92 |
| 93 bool Open(); |
93 virtual bool ProcessNext(); | 94 virtual bool ProcessNext(); |
94 virtual void Reset(); | 95 virtual void Reset(); |
95 virtual size_t GetSize() { | 96 virtual size_t GetSize() { |
96 return in_length_; | 97 return input_file_->Length(); |
97 } | 98 } |
98 | 99 |
99 virtual u8 CalculateOutputLength(); | 100 virtual u8 CalculateOutputLength(); |
100 | 101 |
| 102 virtual bool ProcessCentralDirEntry(const u1 *&p, size_t *compressed_size, |
| 103 size_t *uncompressed_size, char *filename, |
| 104 size_t filename_size, u4 *attr, |
| 105 u4 *offset); |
| 106 |
101 private: | 107 private: |
102 ZipExtractorProcessor *processor; | 108 ZipExtractorProcessor *processor; |
103 | 109 const char* filename_; |
104 int fd_in; // Input file descripor | 110 MappedInputFile *input_file_; |
105 | 111 |
106 // InputZipFile is responsible for maintaining the following | 112 // InputZipFile is responsible for maintaining the following |
107 // pointers. They are allocated by the Create() method before | 113 // pointers. They are allocated by the Create() method before |
108 // the object is actually created using mmap. | 114 // the object is actually created using mmap. |
109 const u1 * const zipdata_in_; // start of input file mmap | 115 const u1 * zipdata_in_; // start of input file mmap |
110 const u1 * zipdata_in_mapped_; // start of still mapped region | 116 size_t bytes_unmapped_; // bytes that have already been unmapped |
111 const u1 * const central_dir_; // central directory in input file | 117 const u1 * central_dir_; // central directory in input file |
112 | 118 |
113 size_t in_length_; // size of the input file | |
114 size_t in_offset_; // offset the input file | 119 size_t in_offset_; // offset the input file |
115 | 120 |
116 const u1 *p; // input cursor | 121 const u1 *p; // input cursor |
117 | 122 |
118 const u1* central_dir_current_; // central dir input cursor | 123 const u1* central_dir_current_; // central dir input cursor |
119 | 124 |
120 // Buffer size is initially INITIAL_BUFFER_SIZE. It doubles in size every | 125 // Buffer size is initially INITIAL_BUFFER_SIZE. It doubles in size every |
121 // time it is found too small, until it reaches MAX_BUFFER_SIZE. If that is | 126 // time it is found too small, until it reaches MAX_BUFFER_SIZE. If that is |
122 // not enough, we bail out. We only decompress class files, so they should | 127 // not enough, we bail out. We only decompress class files, so they should |
123 // be smaller than 64K anyway, but we give a little leeway. | 128 // be smaller than 64K anyway, but we give a little leeway. |
124 // MAX_BUFFER_SIZE must be bigger than the size of the biggest file in the | 129 // MAX_BUFFER_SIZE must be bigger than the size of the biggest file in the |
125 // ZIP. It is set to 128M here so we can uncompress the Bazel server with | 130 // ZIP. It is set to 2GB here because no one has audited the code for 64-bit |
126 // this library. | 131 // cleanliness. |
127 static const size_t INITIAL_BUFFER_SIZE = 256 * 1024; // 256K | 132 static const size_t INITIAL_BUFFER_SIZE = 256 * 1024; // 256K |
128 static const size_t MAX_BUFFER_SIZE = 128 * 1024 * 1024; | 133 static const size_t MAX_BUFFER_SIZE = std::numeric_limits<int32_t>::max(); |
129 static const size_t MAX_MAPPED_REGION = 32 * 1024 * 1024; | 134 static const size_t MAX_MAPPED_REGION = 32 * 1024 * 1024; |
130 | 135 |
131 // These metadata fields are the fields of the ZIP header of the file being | 136 // These metadata fields are the fields of the ZIP header of the file being |
132 // processed. | 137 // processed. |
133 u2 extract_version_; | 138 u2 extract_version_; |
134 u2 general_purpose_bit_flag_; | 139 u2 general_purpose_bit_flag_; |
135 u2 compression_method_; | 140 u2 compression_method_; |
136 u4 uncompressed_size_; | 141 u4 uncompressed_size_; |
137 u4 compressed_size_; | 142 u4 compressed_size_; |
138 u2 file_name_length_; | 143 u2 file_name_length_; |
(...skipping 22 matching lines...) Expand all Loading... |
161 vsnprintf(errmsg, 4*PATH_MAX, fmt, ap); | 166 vsnprintf(errmsg, 4*PATH_MAX, fmt, ap); |
162 va_end(ap); | 167 va_end(ap); |
163 return -1; | 168 return -1; |
164 } | 169 } |
165 | 170 |
166 // Check that at least n bytes remain in the input file, otherwise | 171 // Check that at least n bytes remain in the input file, otherwise |
167 // abort with an error message. "state" is the name of the field | 172 // abort with an error message. "state" is the name of the field |
168 // we're about to read, for diagnostics. | 173 // we're about to read, for diagnostics. |
169 int EnsureRemaining(size_t n, const char *state) { | 174 int EnsureRemaining(size_t n, const char *state) { |
170 size_t in_offset = p - zipdata_in_; | 175 size_t in_offset = p - zipdata_in_; |
171 size_t remaining = in_length_ - in_offset; | 176 size_t remaining = input_file_->Length() - in_offset; |
172 if (n > remaining) { | 177 if (n > remaining) { |
173 return error("Premature end of file (at offset %zd, state=%s); " | 178 return error("Premature end of file (at offset %zd, state=%s); " |
174 "expected %zd more bytes but found %zd.\n", | 179 "expected %zd more bytes but found %zd.\n", |
175 in_offset, state, n, remaining); | 180 in_offset, state, n, remaining); |
176 } | 181 } |
177 return 0; | 182 return 0; |
178 } | 183 } |
179 | 184 |
180 // Read one entry from input zip file | 185 // Read one entry from input zip file |
181 int ProcessLocalFileEntry(size_t compressed_size, size_t uncompressed_size); | 186 int ProcessLocalFileEntry(size_t compressed_size, size_t uncompressed_size); |
182 | 187 |
183 // Uncompress a file from the archive using zlib. The pointer returned | 188 // Uncompress a file from the archive using zlib. The pointer returned |
184 // is owned by InputZipFile, so it must not be freed. Advances the input | 189 // is owned by InputZipFile, so it must not be freed. Advances the input |
185 // cursor to the first byte after the compressed data. | 190 // cursor to the first byte after the compressed data. |
186 u1* UncompressFile(); | 191 u1* UncompressFile(); |
187 | 192 |
188 // Skip a file | 193 // Skip a file |
189 int SkipFile(const bool compressed); | 194 int SkipFile(const bool compressed); |
190 | 195 |
191 // Process a file | 196 // Process a file |
192 int ProcessFile(const bool compressed); | 197 int ProcessFile(const bool compressed); |
193 }; | 198 }; |
194 | 199 |
195 // | 200 // |
196 // A class implementing ZipBuilder that represent an open zip file for writing. | 201 // A class implementing ZipBuilder that represent an open zip file for writing. |
197 // | 202 // |
198 class OutputZipFile : public ZipBuilder { | 203 class OutputZipFile : public ZipBuilder { |
199 public: | 204 public: |
200 OutputZipFile(int fd, u1 * const zipdata_out) : | 205 OutputZipFile(const char* filename, u8 estimated_size) : |
201 fd_out(fd), | 206 output_file_(NULL), |
202 zipdata_out_(zipdata_out), | 207 filename_(filename), |
203 q(zipdata_out) { | 208 estimated_size_(estimated_size), |
| 209 finished_(false) { |
204 errmsg[0] = 0; | 210 errmsg[0] = 0; |
205 } | 211 } |
206 | 212 |
207 virtual const char* GetError() { | 213 virtual const char* GetError() { |
208 if (errmsg[0] == 0) { | 214 if (errmsg[0] == 0) { |
209 return NULL; | 215 return NULL; |
210 } | 216 } |
211 return errmsg; | 217 return errmsg; |
212 } | 218 } |
213 | 219 |
214 virtual ~OutputZipFile() { Finish(); } | 220 virtual ~OutputZipFile() { Finish(); } |
215 virtual u1* NewFile(const char* filename, const u4 attr); | 221 virtual u1* NewFile(const char* filename, const u4 attr); |
216 virtual int FinishFile(size_t filelength, bool compress = false); | 222 virtual int FinishFile(size_t filelength, bool compress = false, |
| 223 bool compute_crc = false); |
217 virtual int WriteEmptyFile(const char *filename); | 224 virtual int WriteEmptyFile(const char *filename); |
218 virtual size_t GetSize() { | 225 virtual size_t GetSize() { |
219 return Offset(q); | 226 return Offset(q); |
220 } | 227 } |
221 virtual int GetNumberFiles() { | 228 virtual int GetNumberFiles() { |
222 return entries_.size(); | 229 return entries_.size(); |
223 } | 230 } |
224 virtual int Finish(); | 231 virtual int Finish(); |
| 232 bool Open(); |
225 | 233 |
226 private: | 234 private: |
227 struct LocalFileEntry { | 235 struct LocalFileEntry { |
228 // Start of the local header (in the output buffer). | 236 // Start of the local header (in the output buffer). |
229 size_t local_header_offset; | 237 size_t local_header_offset; |
230 | 238 |
231 // Sizes of the file entry | 239 // Sizes of the file entry |
232 size_t uncompressed_length; | 240 size_t uncompressed_length; |
233 size_t compressed_length; | 241 size_t compressed_length; |
234 | 242 |
235 // Compression method | 243 // Compression method |
236 u2 compression_method; | 244 u2 compression_method; |
237 | 245 |
| 246 // CRC32 |
| 247 u4 crc32; |
| 248 |
238 // external attributes field | 249 // external attributes field |
239 u4 external_attr; | 250 u4 external_attr; |
240 | 251 |
241 // Start/length of the file_name in the local header. | 252 // Start/length of the file_name in the local header. |
242 u1 *file_name; | 253 u1 *file_name; |
243 u2 file_name_length; | 254 u2 file_name_length; |
244 | 255 |
245 // Start/length of the extra_field in the local header. | 256 // Start/length of the extra_field in the local header. |
246 const u1 *extra_field; | 257 const u1 *extra_field; |
247 u2 extra_field_length; | 258 u2 extra_field_length; |
248 }; | 259 }; |
249 | 260 |
250 int fd_out; // file descriptor for the output file | 261 MappedOutputFile* output_file_; |
| 262 const char* filename_; |
| 263 u8 estimated_size_; |
| 264 bool finished_; |
251 | 265 |
252 // OutputZipFile is responsible for maintaining the following | 266 // OutputZipFile is responsible for maintaining the following |
253 // pointers. They are allocated by the Create() method before | 267 // pointers. They are allocated by the Create() method before |
254 // the object is actually created using mmap. | 268 // the object is actually created using mmap. |
255 u1 * const zipdata_out_; // start of output file mmap | 269 u1 *zipdata_out_; // start of output file mmap |
256 u1 *q; // output cursor | 270 u1 *q; // output cursor |
257 | 271 |
258 u1 *header_ptr; // Current pointer to "compression method" entry. | 272 u1 *header_ptr; // Current pointer to "compression method" entry. |
259 | 273 |
260 // List of entries to write the central directory | 274 // List of entries to write the central directory |
261 std::vector<LocalFileEntry*> entries_; | 275 std::vector<LocalFileEntry*> entries_; |
262 | 276 |
263 // last error | 277 // last error |
264 char errmsg[4*PATH_MAX]; | 278 char errmsg[4*PATH_MAX]; |
265 | 279 |
(...skipping 16 matching lines...) Expand all Loading... |
282 } | 296 } |
283 | 297 |
284 // Write ZIP file header in the output. Since the compressed size is not | 298 // Write ZIP file header in the output. Since the compressed size is not |
285 // known in advance, it must be recorded later. This method returns a pointer | 299 // known in advance, it must be recorded later. This method returns a pointer |
286 // to "compressed size" in the file header that should be passed to | 300 // to "compressed size" in the file header that should be passed to |
287 // WriteFileSizeInLocalFileHeader() later. | 301 // WriteFileSizeInLocalFileHeader() later. |
288 u1* WriteLocalFileHeader(const char *filename, const u4 attr); | 302 u1* WriteLocalFileHeader(const char *filename, const u4 attr); |
289 | 303 |
290 // Fill in the "compressed size" and "uncompressed size" fields in a local | 304 // Fill in the "compressed size" and "uncompressed size" fields in a local |
291 // file header previously written by WriteLocalFileHeader(). | 305 // file header previously written by WriteLocalFileHeader(). |
292 size_t WriteFileSizeInLocalFileHeader(u1 *header_ptr, size_t out_length, | 306 size_t WriteFileSizeInLocalFileHeader(u1 *header_ptr, |
293 bool compress = false); | 307 size_t out_length, |
| 308 bool compress = false, |
| 309 const u4 crc = 0); |
294 }; | 310 }; |
295 | 311 |
296 // | 312 // |
297 // Implementation of InputZipFile | 313 // Implementation of InputZipFile |
298 // | 314 // |
299 bool InputZipFile::ProcessNext() { | 315 bool InputZipFile::ProcessNext() { |
300 // Process the next entry in the central directory. Also make sure that the | 316 // Process the next entry in the central directory. Also make sure that the |
301 // content pointer is in sync. | 317 // content pointer is in sync. |
302 size_t compressed, uncompressed; | 318 size_t compressed, uncompressed; |
303 u4 offset; | 319 u4 offset; |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
406 // next 12 bytes (for CRC, compressed/uncompressed size), otherwise skip | 422 // next 12 bytes (for CRC, compressed/uncompressed size), otherwise skip |
407 // the next 8 bytes (because the value just read was the CRC). | 423 // the next 8 bytes (because the value just read was the CRC). |
408 u4 signature = get_u4le(p); | 424 u4 signature = get_u4le(p); |
409 if (signature == DATA_DESCRIPTOR_SIGNATURE) { | 425 if (signature == DATA_DESCRIPTOR_SIGNATURE) { |
410 p += 4 * 3; | 426 p += 4 * 3; |
411 } else { | 427 } else { |
412 p += 4 * 2; | 428 p += 4 * 2; |
413 } | 429 } |
414 } | 430 } |
415 | 431 |
416 if (p > zipdata_in_mapped_ + MAX_MAPPED_REGION) { | 432 size_t bytes_processed = p - zipdata_in_; |
417 munmap(const_cast<u1 *>(zipdata_in_mapped_), MAX_MAPPED_REGION); | 433 if (bytes_processed > bytes_unmapped_ + MAX_MAPPED_REGION) { |
418 zipdata_in_mapped_ += MAX_MAPPED_REGION; | 434 input_file_->Discard(MAX_MAPPED_REGION); |
| 435 bytes_unmapped_ += MAX_MAPPED_REGION; |
419 } | 436 } |
420 | 437 |
421 return 0; | 438 return 0; |
422 } | 439 } |
423 | 440 |
424 int InputZipFile::SkipFile(const bool compressed) { | 441 int InputZipFile::SkipFile(const bool compressed) { |
425 if (!compressed) { | 442 if (!compressed) { |
426 // In this case, compressed_size_ == uncompressed_size_ (since the file is | 443 // In this case, compressed_size_ == uncompressed_size_ (since the file is |
427 // uncompressed), so we can use either. | 444 // uncompressed), so we can use either. |
428 if (compressed_size_ != uncompressed_size_) { | 445 if (compressed_size_ != uncompressed_size_) { |
429 return error("compressed size != uncompressed size, although the file " | 446 return error("compressed size != uncompressed size, although the file " |
430 "is uncompressed.\n"); | 447 "is uncompressed.\n"); |
431 } | 448 } |
432 } | 449 } |
433 | 450 |
434 if (EnsureRemaining(compressed_size_, "file_data") < 0) { | 451 if (EnsureRemaining(compressed_size_, "file_data") < 0) { |
435 return -1; | 452 return -1; |
436 } | 453 } |
437 p += compressed_size_; | 454 p += compressed_size_; |
438 return 0; | 455 return 0; |
439 } | 456 } |
440 | 457 |
441 u1* InputZipFile::UncompressFile() { | 458 u1* InputZipFile::UncompressFile() { |
442 size_t in_offset = p - zipdata_in_; | 459 size_t in_offset = p - zipdata_in_; |
443 size_t remaining = in_length_ - in_offset; | 460 size_t remaining = input_file_->Length() - in_offset; |
444 z_stream stream; | 461 z_stream stream; |
445 | 462 |
446 stream.zalloc = Z_NULL; | 463 stream.zalloc = Z_NULL; |
447 stream.zfree = Z_NULL; | 464 stream.zfree = Z_NULL; |
448 stream.opaque = Z_NULL; | 465 stream.opaque = Z_NULL; |
449 stream.avail_in = remaining; | 466 stream.avail_in = remaining; |
450 stream.next_in = (Bytef *) p; | 467 stream.next_in = (Bytef *) p; |
451 | 468 |
452 int ret = inflateInit2(&stream, -MAX_WBITS); | 469 int ret = inflateInit2(&stream, -MAX_WBITS); |
453 if (ret != Z_OK) { | 470 if (ret != Z_OK) { |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
540 // Reads and returns some metadata of the next file from the central directory: | 557 // Reads and returns some metadata of the next file from the central directory: |
541 // - compressed size | 558 // - compressed size |
542 // - uncompressed size | 559 // - uncompressed size |
543 // - whether the entry is a class file (to be included in the output). | 560 // - whether the entry is a class file (to be included in the output). |
544 // Precondition: p points to the beginning of an entry in the central dir | 561 // Precondition: p points to the beginning of an entry in the central dir |
545 // Postcondition: p points to the beginning of the next entry in the central dir | 562 // Postcondition: p points to the beginning of the next entry in the central dir |
546 // Returns true if the central directory contains another file and false if not. | 563 // Returns true if the central directory contains another file and false if not. |
547 // Of course, in the latter case, the size output variables are not changed. | 564 // Of course, in the latter case, the size output variables are not changed. |
548 // Note that the central directory is always followed by another data structure | 565 // Note that the central directory is always followed by another data structure |
549 // that has a signature, so parsing it this way is safe. | 566 // that has a signature, so parsing it this way is safe. |
550 static bool ProcessCentralDirEntry( | 567 bool InputZipFile::ProcessCentralDirEntry(const u1 *&p, size_t *compressed_size, |
551 const u1 *&p, size_t *compressed_size, size_t *uncompressed_size, | 568 size_t *uncompressed_size, |
552 char *filename, size_t filename_size, u4 *attr, u4 *offset) { | 569 char *filename, size_t filename_size, |
| 570 u4 *attr, u4 *offset) { |
553 u4 signature = get_u4le(p); | 571 u4 signature = get_u4le(p); |
| 572 |
554 if (signature != CENTRAL_FILE_HEADER_SIGNATURE) { | 573 if (signature != CENTRAL_FILE_HEADER_SIGNATURE) { |
| 574 if (signature != DIGITAL_SIGNATURE && signature != EOCD_SIGNATURE && |
| 575 signature != ZIP64_EOCD_SIGNATURE) { |
| 576 error("invalid central file header signature: 0x%x\n", signature); |
| 577 } |
555 return false; | 578 return false; |
556 } | 579 } |
557 | 580 |
558 p += 16; // skip to 'compressed size' field | 581 p += 16; // skip to 'compressed size' field |
559 *compressed_size = get_u4le(p); | 582 *compressed_size = get_u4le(p); |
560 *uncompressed_size = get_u4le(p); | 583 *uncompressed_size = get_u4le(p); |
561 u2 file_name_length = get_u2le(p); | 584 u2 file_name_length = get_u2le(p); |
562 u2 extra_field_length = get_u2le(p); | 585 u2 extra_field_length = get_u2le(p); |
563 u2 file_comment_length = get_u2le(p); | 586 u2 file_comment_length = get_u2le(p); |
564 p += 4; // skip to external file attributes field | 587 p += 4; // skip to external file attributes field |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
602 compressed_size += (u8) file_compressed; | 625 compressed_size += (u8) file_compressed; |
603 uncompressed_size += (u8) file_uncompressed; | 626 uncompressed_size += (u8) file_uncompressed; |
604 } else { | 627 } else { |
605 skipped_compressed_size += file_compressed; | 628 skipped_compressed_size += file_compressed; |
606 } | 629 } |
607 } | 630 } |
608 | 631 |
609 // The worst case is when the output is simply the input uncompressed. The | 632 // The worst case is when the output is simply the input uncompressed. The |
610 // metadata in the zip file will stay the same, so the file will grow by the | 633 // metadata in the zip file will stay the same, so the file will grow by the |
611 // difference between the compressed and uncompressed sizes. | 634 // difference between the compressed and uncompressed sizes. |
612 return (u8) in_length_ - skipped_compressed_size | 635 return (u8) input_file_->Length() - skipped_compressed_size |
613 + (uncompressed_size - compressed_size); | 636 + (uncompressed_size - compressed_size); |
614 } | 637 } |
615 | 638 |
| 639 // An end of central directory record, sized for optional zip64 contents. |
| 640 struct EndOfCentralDirectoryRecord { |
| 641 u4 number_of_this_disk; |
| 642 u4 disk_with_central_dir; |
| 643 u8 central_dir_entries_on_this_disk; |
| 644 u8 central_dir_entries; |
| 645 u8 central_dir_size; |
| 646 u8 central_dir_offset; |
| 647 }; |
| 648 |
| 649 // Checks for a zip64 end of central directory record. If a valid zip64 EOCD is |
| 650 // found, updates the original EOCD record and returns true. |
| 651 bool MaybeReadZip64CentralDirectory(const u1 *bytes, size_t in_length, |
| 652 const u1 *current, |
| 653 const u1 **end_of_central_dir, |
| 654 EndOfCentralDirectoryRecord *cd) { |
| 655 if (current < bytes) { |
| 656 return false; |
| 657 } |
| 658 const u1 *candidate = current; |
| 659 u4 zip64_directory_signature = get_u4le(current); |
| 660 if (zip64_directory_signature != ZIP64_EOCD_SIGNATURE) { |
| 661 return false; |
| 662 } |
| 663 |
| 664 // size of zip64 end of central directory record |
| 665 // (fixed size unless there's a zip64 extensible data sector, which |
| 666 // we don't need to read) |
| 667 get_u8le(current); |
| 668 get_u2be(current); // version made by |
| 669 get_u2be(current); // version needed to extract |
| 670 |
| 671 u4 number_of_this_disk = get_u4be(current); |
| 672 u4 disk_with_central_dir = get_u4le(current); |
| 673 u8 central_dir_entries_on_this_disk = get_u8le(current); |
| 674 u8 central_dir_entries = get_u8le(current); |
| 675 u8 central_dir_size = get_u8le(current); |
| 676 u8 central_dir_offset = get_u8le(current); |
| 677 |
| 678 // check for a zip64 EOCD that matches the regular EOCD |
| 679 if (number_of_this_disk != cd->number_of_this_disk && |
| 680 cd->number_of_this_disk != U2_MAX) { |
| 681 return false; |
| 682 } |
| 683 if (disk_with_central_dir != cd->disk_with_central_dir && |
| 684 cd->disk_with_central_dir != U2_MAX) { |
| 685 return false; |
| 686 } |
| 687 if (central_dir_entries_on_this_disk != |
| 688 cd->central_dir_entries_on_this_disk && |
| 689 cd->central_dir_entries_on_this_disk != U2_MAX) { |
| 690 return false; |
| 691 } |
| 692 if (central_dir_entries != cd->central_dir_entries && |
| 693 cd->central_dir_entries != U2_MAX) { |
| 694 return false; |
| 695 } |
| 696 if (central_dir_size != cd->central_dir_size && |
| 697 cd->central_dir_size != U4_MAX) { |
| 698 return false; |
| 699 } |
| 700 if (central_dir_offset != cd->central_dir_offset && |
| 701 cd->central_dir_offset != U4_MAX) { |
| 702 return false; |
| 703 } |
| 704 |
| 705 *end_of_central_dir = candidate; |
| 706 cd->number_of_this_disk = number_of_this_disk; |
| 707 cd->disk_with_central_dir = disk_with_central_dir; |
| 708 cd->central_dir_entries_on_this_disk = central_dir_entries_on_this_disk; |
| 709 cd->central_dir_entries = central_dir_entries; |
| 710 cd->central_dir_size = central_dir_size; |
| 711 cd->central_dir_offset = central_dir_offset; |
| 712 return true; |
| 713 } |
| 714 |
| 715 // Starting from the end of central directory record, attempts to locate a zip64 |
| 716 // end of central directory record. If found, updates the given record and |
| 717 // offset with the zip64 data. Returns false on error. |
| 718 bool FindZip64CentralDirectory(const u1 *bytes, size_t in_length, |
| 719 const u1 **end_of_central_dir, |
| 720 EndOfCentralDirectoryRecord *cd) { |
| 721 // In the absence of a zip64 extensible data sector, the zip64 EOCD is at a |
| 722 // fixed offset from the regular central directory. |
| 723 if (MaybeReadZip64CentralDirectory( |
| 724 bytes, in_length, |
| 725 *end_of_central_dir - ZIP64_EOCD_LOCATOR_SIZE - ZIP64_EOCD_FIXED_SIZE, |
| 726 end_of_central_dir, cd)) { |
| 727 return true; |
| 728 } |
| 729 |
| 730 // If we couldn't find a zip64 EOCD at a fixed offset, either it doesn't exist |
| 731 // or there was a zip64 extensible data sector, so try going through the |
| 732 // locator. This approach doesn't work if data was prepended to the archive |
| 733 // without updating the offset in the locator. |
| 734 const u1 *zip64_locator = *end_of_central_dir - ZIP64_EOCD_LOCATOR_SIZE; |
| 735 if (zip64_locator - ZIP64_EOCD_FIXED_SIZE < bytes) { |
| 736 return true; |
| 737 } |
| 738 u4 zip64_locator_signature = get_u4le(zip64_locator); |
| 739 if (zip64_locator_signature != ZIP64_EOCD_LOCATOR_SIGNATURE) { |
| 740 return true; |
| 741 } |
| 742 u4 disk_with_zip64_central_directory = get_u4le(zip64_locator); |
| 743 u8 zip64_end_of_central_dir_offset = get_u8le(zip64_locator); |
| 744 u4 zip64_total_disks = get_u4le(zip64_locator); |
| 745 if (MaybeReadZip64CentralDirectory(bytes, in_length, |
| 746 bytes + zip64_end_of_central_dir_offset, |
| 747 end_of_central_dir, cd)) { |
| 748 if (disk_with_zip64_central_directory != 0 || zip64_total_disks != 1) { |
| 749 fprintf(stderr, "multi-disk JAR files are not supported\n"); |
| 750 return false; |
| 751 } |
| 752 return true; |
| 753 } |
| 754 return true; |
| 755 } |
| 756 |
616 // Given the data in the zip file, returns the offset of the central directory | 757 // Given the data in the zip file, returns the offset of the central directory |
617 // and the number of files contained in it. | 758 // and the number of files contained in it. |
618 bool FindZipCentralDirectory(const u1* bytes, size_t in_length, | 759 bool FindZipCentralDirectory(const u1 *bytes, size_t in_length, u4 *offset, |
619 u4* offset, const u1** central_dir) { | 760 const u1 **central_dir) { |
620 static const int MAX_COMMENT_LENGTH = 0xffff; | 761 static const int MAX_COMMENT_LENGTH = 0xffff; |
621 static const int CENTRAL_DIR_LOCATOR_SIZE = 22; | 762 static const int CENTRAL_DIR_LOCATOR_SIZE = 22; |
622 // Maximum distance of start of central dir locator from end of file | 763 // Maximum distance of start of central dir locator from end of file |
623 static const int MAX_DELTA = MAX_COMMENT_LENGTH + CENTRAL_DIR_LOCATOR_SIZE; | 764 static const int MAX_DELTA = MAX_COMMENT_LENGTH + CENTRAL_DIR_LOCATOR_SIZE; |
624 const u1* last_pos_to_check = in_length < MAX_DELTA | 765 const u1* last_pos_to_check = in_length < MAX_DELTA |
625 ? bytes | 766 ? bytes |
626 : bytes + (in_length - MAX_DELTA); | 767 : bytes + (in_length - MAX_DELTA); |
627 const u1* current; | 768 const u1* current; |
628 bool found = false; | 769 bool found = false; |
629 | 770 |
630 for (current = bytes + in_length - CENTRAL_DIR_LOCATOR_SIZE; | 771 for (current = bytes + in_length - CENTRAL_DIR_LOCATOR_SIZE; |
631 current >= last_pos_to_check; | 772 current >= last_pos_to_check; |
632 current-- ) { | 773 current-- ) { |
633 const u1* p = current; | 774 const u1* p = current; |
634 if (get_u4le(p) != END_OF_CENTRAL_DIR_SIGNATURE) { | 775 if (get_u4le(p) != EOCD_SIGNATURE) { |
635 continue; | 776 continue; |
636 } | 777 } |
637 | 778 |
638 p += 16; // skip to comment length field | 779 p += 16; // skip to comment length field |
639 u2 comment_length = get_u2le(p); | 780 u2 comment_length = get_u2le(p); |
640 | 781 |
641 // Does the comment go exactly till the end of the file? | 782 // Does the comment go exactly till the end of the file? |
642 if (current + comment_length + CENTRAL_DIR_LOCATOR_SIZE | 783 if (current + comment_length + CENTRAL_DIR_LOCATOR_SIZE |
643 != bytes + in_length) { | 784 != bytes + in_length) { |
644 continue; | 785 continue; |
645 } | 786 } |
646 | 787 |
647 // Hooray, we found it! | 788 // Hooray, we found it! |
648 found = true; | 789 found = true; |
649 break; | 790 break; |
650 } | 791 } |
651 | 792 |
652 if (!found) { | 793 if (!found) { |
653 fprintf(stderr, "file is invalid or corrupted (missing end of central " | 794 fprintf(stderr, "file is invalid or corrupted (missing end of central " |
654 "directory record)\n"); | 795 "directory record)\n"); |
655 return false; | 796 return false; |
656 } | 797 } |
657 | 798 |
| 799 EndOfCentralDirectoryRecord cd; |
658 const u1* end_of_central_dir = current; | 800 const u1* end_of_central_dir = current; |
659 get_u4le(current); // central directory locator signature, already checked | 801 get_u4le(current); // central directory locator signature, already checked |
660 u2 number_of_this_disk = get_u2le(current); | 802 cd.number_of_this_disk = get_u2le(current); |
661 u2 disk_with_central_dir = get_u2le(current); | 803 cd.disk_with_central_dir = get_u2le(current); |
662 u2 central_dir_entries_on_this_disk = get_u2le(current); | 804 cd.central_dir_entries_on_this_disk = get_u2le(current); |
663 u2 central_dir_entries = get_u2le(current); | 805 cd.central_dir_entries = get_u2le(current); |
664 u4 central_dir_size = get_u4le(current); | 806 cd.central_dir_size = get_u4le(current); |
665 u4 central_dir_offset = get_u4le(current); | 807 cd.central_dir_offset = get_u4le(current); |
666 u2 file_comment_length = get_u2le(current); | 808 u2 file_comment_length = get_u2le(current); |
667 current += file_comment_length; // set current to the end of the central dir | 809 current += file_comment_length; // set current to the end of the central dir |
668 | 810 |
669 if (number_of_this_disk != 0 | 811 if (!FindZip64CentralDirectory(bytes, in_length, &end_of_central_dir, &cd)) { |
670 || disk_with_central_dir != 0 | 812 return false; |
671 || central_dir_entries_on_this_disk != central_dir_entries) { | 813 } |
| 814 |
| 815 if (cd.number_of_this_disk != 0 || cd.disk_with_central_dir != 0 || |
| 816 cd.central_dir_entries_on_this_disk != cd.central_dir_entries) { |
672 fprintf(stderr, "multi-disk JAR files are not supported\n"); | 817 fprintf(stderr, "multi-disk JAR files are not supported\n"); |
673 return false; | 818 return false; |
674 } | 819 } |
675 | 820 |
676 // Do not change output values before determining that they are OK. | 821 // Do not change output values before determining that they are OK. |
677 *offset = central_dir_offset; | 822 *offset = cd.central_dir_offset; |
678 // Central directory start can then be used to determine the actual | 823 // Central directory start can then be used to determine the actual |
679 // starts of the zip file (which can be different in case of a non-zip | 824 // starts of the zip file (which can be different in case of a non-zip |
680 // header like for auto-extractable binaries). | 825 // header like for auto-extractable binaries). |
681 *central_dir = end_of_central_dir - central_dir_size; | 826 *central_dir = end_of_central_dir - cd.central_dir_size; |
682 return true; | 827 return true; |
683 } | 828 } |
684 | 829 |
685 void InputZipFile::Reset() { | 830 void InputZipFile::Reset() { |
686 central_dir_current_ = central_dir_; | 831 central_dir_current_ = central_dir_; |
687 zipdata_in_mapped_ = zipdata_in_; | 832 bytes_unmapped_ = 0; |
688 p = zipdata_in_ + in_offset_; | 833 p = zipdata_in_ + in_offset_; |
689 } | 834 } |
690 | 835 |
691 int ZipExtractor::ProcessAll() { | 836 int ZipExtractor::ProcessAll() { |
692 while (ProcessNext()) {} | 837 while (ProcessNext()) {} |
693 if (GetError() != NULL) { | 838 if (GetError() != NULL) { |
694 return -1; | 839 return -1; |
695 } | 840 } |
696 return 0; | 841 return 0; |
697 } | 842 } |
698 | 843 |
699 ZipExtractor* ZipExtractor::Create(const char* filename, | 844 ZipExtractor* ZipExtractor::Create(const char* filename, |
700 ZipExtractorProcessor *processor) { | 845 ZipExtractorProcessor *processor) { |
701 int fd_in = open(filename, O_RDONLY); | 846 InputZipFile* result = new InputZipFile(processor, filename); |
702 if (fd_in < 0) { | 847 if (!result->Open()) { |
| 848 fprintf(stderr, "%s\n", result->GetError()); |
| 849 delete result; |
703 return NULL; | 850 return NULL; |
704 } | 851 } |
705 | 852 |
706 off_t length = lseek(fd_in, 0, SEEK_END); | 853 return result; |
707 if (length < 0) { | 854 } |
708 return NULL; | 855 |
| 856 // zipdata_in_, in_offset_, p, central_dir_current_ |
| 857 |
| 858 InputZipFile::InputZipFile(ZipExtractorProcessor *processor, |
| 859 const char* filename) |
| 860 : processor(processor), filename_(filename), input_file_(NULL), |
| 861 bytes_unmapped_(0) { |
| 862 uncompressed_data_allocated_ = INITIAL_BUFFER_SIZE; |
| 863 uncompressed_data_ = |
| 864 reinterpret_cast<u1*>(malloc(uncompressed_data_allocated_)); |
| 865 errmsg[0] = 0; |
| 866 } |
| 867 |
| 868 bool InputZipFile::Open() { |
| 869 MappedInputFile* input_file = new MappedInputFile(filename_); |
| 870 if (!input_file->Opened()) { |
| 871 snprintf(errmsg, sizeof(errmsg), "%s", input_file->Error()); |
| 872 delete input_file; |
| 873 return false; |
709 } | 874 } |
710 | 875 |
711 void *zipdata_in = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd_in, 0); | 876 void *zipdata_in = input_file->Buffer(); |
712 if (zipdata_in == MAP_FAILED) { | |
713 return NULL; | |
714 } | |
715 | |
716 u4 central_dir_offset; | 877 u4 central_dir_offset; |
717 const u1 *central_dir = NULL; | 878 const u1 *central_dir = NULL; |
718 | 879 |
719 if (!devtools_ijar::FindZipCentralDirectory( | 880 if (!devtools_ijar::FindZipCentralDirectory( |
720 static_cast<const u1*>(zipdata_in), length, | 881 static_cast<const u1*>(zipdata_in), input_file->Length(), |
721 ¢ral_dir_offset, ¢ral_dir)) { | 882 ¢ral_dir_offset, ¢ral_dir)) { |
722 errno = EIO; // we don't really have a good error number | 883 errno = EIO; // we don't really have a good error number |
723 return NULL; | 884 error("Cannot find central directory"); |
| 885 delete input_file; |
| 886 return false; |
724 } | 887 } |
725 const u1 *zipdata_start = static_cast<const u1*>(zipdata_in); | 888 const u1 *zipdata_start = static_cast<const u1*>(zipdata_in); |
726 off_t offset = - static_cast<off_t>(zipdata_start | 889 in_offset_ = - static_cast<off_t>(zipdata_start |
727 + central_dir_offset | 890 + central_dir_offset |
728 - central_dir); | 891 - central_dir); |
729 | 892 |
730 return new InputZipFile(processor, fd_in, length, offset, | 893 input_file_ = input_file; |
731 zipdata_start, central_dir); | 894 zipdata_in_ = zipdata_start; |
732 } | 895 central_dir_ = central_dir; |
733 | 896 central_dir_current_ = central_dir; |
734 InputZipFile::InputZipFile(ZipExtractorProcessor *processor, int fd, | 897 p = zipdata_in_ + in_offset_; |
735 off_t in_length, off_t in_offset, | |
736 const u1* zipdata_in, const u1* central_dir) | |
737 : processor(processor), fd_in(fd), | |
738 zipdata_in_(zipdata_in), zipdata_in_mapped_(zipdata_in), | |
739 central_dir_(central_dir), in_length_(in_length), in_offset_(in_offset), | |
740 p(zipdata_in + in_offset), central_dir_current_(central_dir) { | |
741 uncompressed_data_allocated_ = INITIAL_BUFFER_SIZE; | |
742 uncompressed_data_ = | |
743 reinterpret_cast<u1*>(malloc(uncompressed_data_allocated_)); | |
744 errmsg[0] = 0; | 898 errmsg[0] = 0; |
| 899 return true; |
745 } | 900 } |
746 | 901 |
747 InputZipFile::~InputZipFile() { | 902 InputZipFile::~InputZipFile() { |
748 free(uncompressed_data_); | 903 free(uncompressed_data_); |
749 close(fd_in); | 904 if (input_file_ != NULL) { |
| 905 input_file_->Close(); |
| 906 delete input_file_; |
| 907 } |
750 } | 908 } |
751 | 909 |
752 | 910 |
753 // | 911 // |
754 // Implementation of OutputZipFile | 912 // Implementation of OutputZipFile |
755 // | 913 // |
756 int OutputZipFile::WriteEmptyFile(const char *filename) { | 914 int OutputZipFile::WriteEmptyFile(const char *filename) { |
757 const u1* file_name = (const u1*) filename; | 915 const u1* file_name = (const u1*) filename; |
758 size_t file_name_length = strlen(filename); | 916 size_t file_name_length = strlen(filename); |
759 | 917 |
760 LocalFileEntry *entry = new LocalFileEntry; | 918 LocalFileEntry *entry = new LocalFileEntry; |
761 entry->local_header_offset = Offset(q); | 919 entry->local_header_offset = Offset(q); |
762 entry->external_attr = 0; | 920 entry->external_attr = 0; |
| 921 entry->crc32 = 0; |
763 | 922 |
764 // Output the ZIP local_file_header: | 923 // Output the ZIP local_file_header: |
765 put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE); | 924 put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE); |
766 put_u2le(q, 10); // extract_version | 925 put_u2le(q, 10); // extract_version |
767 put_u2le(q, 0); // general_purpose_bit_flag | 926 put_u2le(q, 0); // general_purpose_bit_flag |
768 put_u2le(q, 0); // compression_method | 927 put_u2le(q, 0); // compression_method |
769 put_u2le(q, 0); // last_mod_file_time | 928 put_u2le(q, 0); // last_mod_file_time |
770 put_u2le(q, 0); // last_mod_file_date | 929 put_u2le(q, 0); // last_mod_file_date |
771 put_u4le(q, 0); // crc32 | 930 put_u4le(q, entry->crc32); // crc32 |
772 put_u4le(q, 0); // compressed_size | 931 put_u4le(q, 0); // compressed_size |
773 put_u4le(q, 0); // uncompressed_size | 932 put_u4le(q, 0); // uncompressed_size |
774 put_u2le(q, file_name_length); | 933 put_u2le(q, file_name_length); |
775 put_u2le(q, 0); // extra_field_length | 934 put_u2le(q, 0); // extra_field_length |
776 put_n(q, file_name, file_name_length); | 935 put_n(q, file_name, file_name_length); |
777 | 936 |
778 entry->file_name_length = file_name_length; | 937 entry->file_name_length = file_name_length; |
779 entry->extra_field_length = 0; | 938 entry->extra_field_length = 0; |
780 entry->compressed_length = 0; | 939 entry->compressed_length = 0; |
781 entry->uncompressed_length = 0; | 940 entry->uncompressed_length = 0; |
(...skipping 11 matching lines...) Expand all Loading... |
793 for (size_t ii = 0; ii < entries_.size(); ++ii) { | 952 for (size_t ii = 0; ii < entries_.size(); ++ii) { |
794 LocalFileEntry *entry = entries_[ii]; | 953 LocalFileEntry *entry = entries_[ii]; |
795 put_u4le(q, CENTRAL_FILE_HEADER_SIGNATURE); | 954 put_u4le(q, CENTRAL_FILE_HEADER_SIGNATURE); |
796 put_u2le(q, 0); // version made by | 955 put_u2le(q, 0); // version made by |
797 | 956 |
798 put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract | 957 put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract |
799 put_u2le(q, 0); // general purpose bit flag | 958 put_u2le(q, 0); // general purpose bit flag |
800 put_u2le(q, entry->compression_method); // compression method: | 959 put_u2le(q, entry->compression_method); // compression method: |
801 put_u2le(q, 0); // last_mod_file_time | 960 put_u2le(q, 0); // last_mod_file_time |
802 put_u2le(q, 0); // last_mod_file_date | 961 put_u2le(q, 0); // last_mod_file_date |
803 put_u4le(q, 0); // crc32 (jar/javac tools don't care) | 962 put_u4le(q, entry->crc32); // crc32 |
804 put_u4le(q, entry->compressed_length); // compressed_size | 963 put_u4le(q, entry->compressed_length); // compressed_size |
805 put_u4le(q, entry->uncompressed_length); // uncompressed_size | 964 put_u4le(q, entry->uncompressed_length); // uncompressed_size |
806 put_u2le(q, entry->file_name_length); | 965 put_u2le(q, entry->file_name_length); |
807 put_u2le(q, entry->extra_field_length); | 966 put_u2le(q, entry->extra_field_length); |
808 | 967 |
809 put_u2le(q, 0); // file comment length | 968 put_u2le(q, 0); // file comment length |
810 put_u2le(q, 0); // disk number start | 969 put_u2le(q, 0); // disk number start |
811 put_u2le(q, 0); // internal file attributes | 970 put_u2le(q, 0); // internal file attributes |
812 put_u4le(q, entry->external_attr); // external file attributes | 971 put_u4le(q, entry->external_attr); // external file attributes |
813 // relative offset of local header: | 972 // relative offset of local header: |
814 put_u4le(q, entry->local_header_offset); | 973 put_u4le(q, entry->local_header_offset); |
815 | 974 |
816 put_n(q, entry->file_name, entry->file_name_length); | 975 put_n(q, entry->file_name, entry->file_name_length); |
817 put_n(q, entry->extra_field, entry->extra_field_length); | 976 put_n(q, entry->extra_field, entry->extra_field_length); |
818 } | 977 } |
819 u4 central_directory_size = q - central_directory_start; | 978 u8 central_directory_size = q - central_directory_start; |
820 | 979 |
821 put_u4le(q, END_OF_CENTRAL_DIR_SIGNATURE); | 980 if (entries_.size() > U2_MAX || central_directory_size > U4_MAX || |
822 put_u2le(q, 0); // number of this disk | 981 Offset(central_directory_start) > U4_MAX) { |
823 put_u2le(q, 0); // number of the disk with the start of the central directory | 982 u1 *zip64_end_of_central_directory_start = q; |
824 put_u2le(q, entries_.size()); // # central dir entries on this disk | 983 |
825 put_u2le(q, entries_.size()); // total # entries in the central directory | 984 put_u4le(q, ZIP64_EOCD_SIGNATURE); |
826 put_u4le(q, central_directory_size); // size of the central directory | 985 // signature and size field doesn't count towards size |
827 put_u4le(q, Offset(central_directory_start)); // offset of start of central | 986 put_u8le(q, ZIP64_EOCD_FIXED_SIZE - 12); |
828 // directory wrt starting disk | 987 put_u2le(q, 0); // version made by |
829 put_u2le(q, 0); // .ZIP file comment length | 988 put_u2le(q, 0); // version needed to extract |
| 989 put_u4le(q, 0); // number of this disk |
| 990 put_u4le(q, 0); // # of the disk with the start of the central directory |
| 991 put_u8le(q, entries_.size()); // # central dir entries on this disk |
| 992 put_u8le(q, entries_.size()); // total # entries in the central directory |
| 993 put_u8le(q, central_directory_size); // size of the central directory |
| 994 // offset of start of central directory wrt starting disk |
| 995 put_u8le(q, Offset(central_directory_start)); |
| 996 |
| 997 put_u4le(q, ZIP64_EOCD_LOCATOR_SIGNATURE); |
| 998 // number of the disk with the start of the zip64 end of central directory |
| 999 put_u4le(q, 0); |
| 1000 // relative offset of the zip64 end of central directory record |
| 1001 put_u8le(q, Offset(zip64_end_of_central_directory_start)); |
| 1002 // total number of disks |
| 1003 put_u4le(q, 1); |
| 1004 |
| 1005 put_u4le(q, EOCD_SIGNATURE); |
| 1006 put_u2le(q, 0); // number of this disk |
| 1007 put_u2le(q, 0); // # of disk with the start of the central directory |
| 1008 // # central dir entries on this disk |
| 1009 put_u2le(q, entries_.size() > 0xffff ? 0xffff : entries_.size()); |
| 1010 // total # entries in the central directory |
| 1011 put_u2le(q, entries_.size() > 0xffff ? 0xffff : entries_.size()); |
| 1012 // size of the central directory |
| 1013 put_u4le(q, |
| 1014 central_directory_size > U4_MAX ? U4_MAX : central_directory_size); |
| 1015 // offset of start of central |
| 1016 put_u4le(q, Offset(central_directory_start) > U4_MAX |
| 1017 ? U4_MAX |
| 1018 : Offset(central_directory_start)); |
| 1019 put_u2le(q, 0); // .ZIP file comment length |
| 1020 |
| 1021 } else { |
| 1022 put_u4le(q, EOCD_SIGNATURE); |
| 1023 put_u2le(q, 0); // number of this disk |
| 1024 put_u2le(q, 0); // # of the disk with the start of the central directory |
| 1025 put_u2le(q, entries_.size()); // # central dir entries on this disk |
| 1026 put_u2le(q, entries_.size()); // total # entries in the central directory |
| 1027 put_u4le(q, central_directory_size); // size of the central directory |
| 1028 // offset of start of central directory wrt starting disk |
| 1029 put_u4le(q, Offset(central_directory_start)); |
| 1030 put_u2le(q, 0); // .ZIP file comment length |
| 1031 } |
830 } | 1032 } |
831 | 1033 |
832 u1* OutputZipFile::WriteLocalFileHeader(const char* filename, const u4 attr) { | 1034 u1* OutputZipFile::WriteLocalFileHeader(const char* filename, const u4 attr) { |
833 off_t file_name_length_ = strlen(filename); | 1035 off_t file_name_length_ = strlen(filename); |
834 LocalFileEntry *entry = new LocalFileEntry; | 1036 LocalFileEntry *entry = new LocalFileEntry; |
835 entry->local_header_offset = Offset(q); | 1037 entry->local_header_offset = Offset(q); |
836 entry->file_name_length = file_name_length_; | 1038 entry->file_name_length = file_name_length_; |
837 entry->file_name = new u1[file_name_length_]; | 1039 entry->file_name = new u1[file_name_length_]; |
838 entry->external_attr = attr; | 1040 entry->external_attr = attr; |
839 memcpy(entry->file_name, filename, file_name_length_); | 1041 memcpy(entry->file_name, filename, file_name_length_); |
840 entry->extra_field_length = 0; | 1042 entry->extra_field_length = 0; |
841 entry->extra_field = (const u1 *)""; | 1043 entry->extra_field = (const u1 *)""; |
842 | 1044 |
843 // Output the ZIP local_file_header: | 1045 // Output the ZIP local_file_header: |
844 put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE); | 1046 put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE); |
845 put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract | 1047 put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract |
846 put_u2le(q, 0); // general purpose bit flag | 1048 put_u2le(q, 0); // general purpose bit flag |
847 u1 *header_ptr = q; | 1049 u1 *header_ptr = q; |
848 put_u2le(q, COMPRESSION_METHOD_STORED); // compression method = placeholder | 1050 put_u2le(q, COMPRESSION_METHOD_STORED); // compression method = placeholder |
849 put_u2le(q, 0); // last_mod_file_time | 1051 put_u2le(q, 0); // last_mod_file_time |
850 put_u2le(q, 0); // last_mod_file_date | 1052 put_u2le(q, 0); // last_mod_file_date |
851 put_u4le(q, 0); // crc32 (jar/javac tools don't care) | 1053 put_u4le(q, entry->crc32); // crc32 |
852 put_u4le(q, 0); // compressed_size = placeholder | 1054 put_u4le(q, 0); // compressed_size = placeholder |
853 put_u4le(q, 0); // uncompressed_size = placeholder | 1055 put_u4le(q, 0); // uncompressed_size = placeholder |
854 put_u2le(q, entry->file_name_length); | 1056 put_u2le(q, entry->file_name_length); |
855 put_u2le(q, entry->extra_field_length); | 1057 put_u2le(q, entry->extra_field_length); |
856 | 1058 |
857 put_n(q, entry->file_name, entry->file_name_length); | 1059 put_n(q, entry->file_name, entry->file_name_length); |
858 put_n(q, entry->extra_field, entry->extra_field_length); | 1060 put_n(q, entry->extra_field, entry->extra_field_length); |
859 entries_.push_back(entry); | 1061 entries_.push_back(entry); |
860 | 1062 |
861 return header_ptr; | 1063 return header_ptr; |
(...skipping 11 matching lines...) Expand all Loading... |
873 stream.zalloc = Z_NULL; | 1075 stream.zalloc = Z_NULL; |
874 stream.zfree = Z_NULL; | 1076 stream.zfree = Z_NULL; |
875 stream.opaque = Z_NULL; | 1077 stream.opaque = Z_NULL; |
876 stream.total_in = length; | 1078 stream.total_in = length; |
877 stream.avail_in = length; | 1079 stream.avail_in = length; |
878 stream.total_out = length; | 1080 stream.total_out = length; |
879 stream.avail_out = length; | 1081 stream.avail_out = length; |
880 stream.next_in = buf; | 1082 stream.next_in = buf; |
881 stream.next_out = outbuf; | 1083 stream.next_out = outbuf; |
882 | 1084 |
883 if (deflateInit(&stream, Z_DEFAULT_COMPRESSION) != Z_OK) { | 1085 // deflateInit2 negative windows size prevent the zlib wrapper to be used. |
| 1086 if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, |
| 1087 -MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) { |
884 // Failure to compress => return the buffer uncompressed | 1088 // Failure to compress => return the buffer uncompressed |
885 free(outbuf); | 1089 free(outbuf); |
886 return length; | 1090 return length; |
887 } | 1091 } |
888 | 1092 |
889 if (deflate(&stream, Z_FINISH) == Z_STREAM_END) { | 1093 if (deflate(&stream, Z_FINISH) == Z_STREAM_END) { |
890 // Compression successful and fits in outbuf, let's copy the result in buf. | 1094 // Compression successful and fits in outbuf, let's copy the result in buf. |
891 length = stream.total_out; | 1095 length = stream.total_out; |
892 memcpy(buf, outbuf, length); | 1096 memcpy(buf, outbuf, length); |
893 } | 1097 } |
894 | 1098 |
895 deflateEnd(&stream); | 1099 deflateEnd(&stream); |
896 free(outbuf); | 1100 free(outbuf); |
897 | 1101 |
898 // Return the length of the resulting buffer | 1102 // Return the length of the resulting buffer |
899 return length; | 1103 return length; |
900 } | 1104 } |
901 | 1105 |
902 size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *header_ptr, | 1106 size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *header_ptr, |
903 size_t out_length, | 1107 size_t out_length, |
904 bool compress) { | 1108 bool compress, |
| 1109 const u4 crc) { |
905 size_t compressed_size = out_length; | 1110 size_t compressed_size = out_length; |
906 if (compress) { | 1111 if (compress) { |
907 compressed_size = TryDeflate(q, out_length); | 1112 compressed_size = TryDeflate(q, out_length); |
908 } | 1113 } |
909 // compression method | 1114 // compression method |
910 if (compressed_size < out_length) { | 1115 if (compressed_size < out_length) { |
911 put_u2le(header_ptr, COMPRESSION_METHOD_DEFLATED); | 1116 put_u2le(header_ptr, COMPRESSION_METHOD_DEFLATED); |
912 } else { | 1117 } else { |
913 put_u2le(header_ptr, COMPRESSION_METHOD_STORED); | 1118 put_u2le(header_ptr, COMPRESSION_METHOD_STORED); |
914 } | 1119 } |
915 header_ptr += 8; | 1120 header_ptr += 4; |
| 1121 put_u4le(header_ptr, crc); // crc32 |
916 put_u4le(header_ptr, compressed_size); // compressed_size | 1122 put_u4le(header_ptr, compressed_size); // compressed_size |
917 put_u4le(header_ptr, out_length); // uncompressed_size | 1123 put_u4le(header_ptr, out_length); // uncompressed_size |
918 return compressed_size; | 1124 return compressed_size; |
919 } | 1125 } |
920 | 1126 |
921 int OutputZipFile::Finish() { | 1127 int OutputZipFile::Finish() { |
922 if (fd_out > 0) { | 1128 if (finished_) { |
923 WriteCentralDirectory(); | 1129 return 0; |
924 if (ftruncate(fd_out, GetSize()) < 0) { | |
925 return error("ftruncate(fd_out, GetSize()): %s", strerror(errno)); | |
926 } | |
927 if (close(fd_out) < 0) { | |
928 return error("close(fd_out): %s", strerror(errno)); | |
929 } | |
930 fd_out = -1; | |
931 } | 1130 } |
| 1131 |
| 1132 finished_ = true; |
| 1133 WriteCentralDirectory(); |
| 1134 if (output_file_->Close(GetSize()) < 0) { |
| 1135 return error("%s", output_file_->Error()); |
| 1136 } |
| 1137 delete output_file_; |
| 1138 output_file_ = NULL; |
932 return 0; | 1139 return 0; |
933 } | 1140 } |
934 | 1141 |
935 u1* OutputZipFile::NewFile(const char* filename, const u4 attr) { | 1142 u1* OutputZipFile::NewFile(const char* filename, const u4 attr) { |
936 header_ptr = WriteLocalFileHeader(filename, attr); | 1143 header_ptr = WriteLocalFileHeader(filename, attr); |
937 return q; | 1144 return q; |
938 } | 1145 } |
939 | 1146 |
940 int OutputZipFile::FinishFile(size_t filelength, bool compress) { | 1147 int OutputZipFile::FinishFile(size_t filelength, bool compress, |
| 1148 bool compute_crc) { |
| 1149 u4 crc = 0; |
| 1150 if (compute_crc) { |
| 1151 crc = crc32(crc, q, filelength); |
| 1152 } |
941 size_t compressed_size = | 1153 size_t compressed_size = |
942 WriteFileSizeInLocalFileHeader(header_ptr, filelength, compress); | 1154 WriteFileSizeInLocalFileHeader(header_ptr, filelength, compress, crc); |
| 1155 entries_.back()->crc32 = crc; |
943 entries_.back()->compressed_length = compressed_size; | 1156 entries_.back()->compressed_length = compressed_size; |
944 entries_.back()->uncompressed_length = filelength; | 1157 entries_.back()->uncompressed_length = filelength; |
945 if (compressed_size < filelength) { | 1158 if (compressed_size < filelength) { |
946 entries_.back()->compression_method = COMPRESSION_METHOD_DEFLATED; | 1159 entries_.back()->compression_method = COMPRESSION_METHOD_DEFLATED; |
947 } else { | 1160 } else { |
948 entries_.back()->compression_method = COMPRESSION_METHOD_STORED; | 1161 entries_.back()->compression_method = COMPRESSION_METHOD_STORED; |
949 } | 1162 } |
950 q += compressed_size; | 1163 q += compressed_size; |
951 return 0; | 1164 return 0; |
952 } | 1165 } |
953 | 1166 |
954 ZipBuilder* ZipBuilder::Create(const char* zip_file, u8 estimated_size) { | 1167 bool OutputZipFile::Open() { |
955 if (estimated_size > kMaximumOutputSize) { | 1168 if (estimated_size_ > kMaximumOutputSize) { |
956 fprintf(stderr, | 1169 fprintf(stderr, |
957 "Uncompressed input jar has size %llu, " | 1170 "Uncompressed input jar has size %llu, " |
958 "which exceeds the maximum supported output size %llu.\n" | 1171 "which exceeds the maximum supported output size %llu.\n" |
959 "Assuming that ijar will be smaller and hoping for the best.\n", | 1172 "Assuming that ijar will be smaller and hoping for the best.\n", |
960 estimated_size, kMaximumOutputSize); | 1173 estimated_size_, kMaximumOutputSize); |
961 estimated_size = kMaximumOutputSize; | 1174 estimated_size_ = kMaximumOutputSize; |
962 } | 1175 } |
963 | 1176 |
964 int fd_out = open(zip_file, O_CREAT|O_RDWR|O_TRUNC, 0644); | 1177 MappedOutputFile* output_file = new MappedOutputFile( |
965 if (fd_out < 0) { | 1178 filename_, estimated_size_); |
| 1179 if (!output_file->Opened()) { |
| 1180 snprintf(errmsg, sizeof(errmsg), "%s", output_file->Error()); |
| 1181 delete output_file; |
| 1182 return false; |
| 1183 } |
| 1184 |
| 1185 output_file_ = output_file; |
| 1186 q = output_file->Buffer(); |
| 1187 zipdata_out_ = output_file->Buffer(); |
| 1188 return true; |
| 1189 } |
| 1190 |
| 1191 ZipBuilder* ZipBuilder::Create(const char* zip_file, u8 estimated_size) { |
| 1192 OutputZipFile* result = new OutputZipFile(zip_file, estimated_size); |
| 1193 if (!result->Open()) { |
| 1194 fprintf(stderr, "%s\n", result->GetError()); |
| 1195 delete result; |
966 return NULL; | 1196 return NULL; |
967 } | 1197 } |
968 | 1198 |
969 // Create mmap-able sparse file | 1199 return result; |
970 if (ftruncate(fd_out, estimated_size) < 0) { | |
971 return NULL; | |
972 } | |
973 | |
974 // Ensure that any buffer overflow in JarStripper will result in | |
975 // SIGSEGV or SIGBUS by over-allocating beyond the end of the file. | |
976 size_t mmap_length = std::min(estimated_size + sysconf(_SC_PAGESIZE), | |
977 (u8) std::numeric_limits<size_t>::max()); | |
978 | |
979 void *zipdata_out = mmap(NULL, mmap_length, PROT_WRITE, | |
980 MAP_SHARED, fd_out, 0); | |
981 if (zipdata_out == MAP_FAILED) { | |
982 fprintf(stderr, "output_length=%llu\n", estimated_size); | |
983 return NULL; | |
984 } | |
985 | |
986 return new OutputZipFile(fd_out, (u1*) zipdata_out); | |
987 } | 1200 } |
988 | 1201 |
989 u8 ZipBuilder::EstimateSize(char **files) { | 1202 u8 ZipBuilder::EstimateSize(char **files) { |
990 struct stat statst; | 1203 struct stat statst; |
991 // Digital signature field size = 6, End of central directory = 22, Total = 28 | 1204 // Digital signature field size = 6, End of central directory = 22, Total = 28 |
992 u8 size = 28; | 1205 u8 size = 28; |
993 // Count the size of all the files in the input to estimate the size of the | 1206 // Count the size of all the files in the input to estimate the size of the |
994 // output. | 1207 // output. |
995 for (int i = 0; files[i] != NULL; i++) { | 1208 for (int i = 0; files[i] != NULL; i++) { |
996 if (stat(files[i], &statst) != 0) { | 1209 if (stat(files[i], &statst) != 0) { |
997 fprintf(stderr, "File %s does not seem to exist.", files[i]); | 1210 fprintf(stderr, "File %s does not seem to exist.", files[i]); |
998 return 0; | 1211 return 0; |
999 } | 1212 } |
1000 size += statst.st_size; | 1213 size += statst.st_size; |
1001 // Add sizes of Zip meta data | 1214 // Add sizes of Zip meta data |
1002 // local file header = 30 bytes | 1215 // local file header = 30 bytes |
1003 // data descriptor = 12 bytes | 1216 // data descriptor = 12 bytes |
1004 // central directory descriptor = 46 bytes | 1217 // central directory descriptor = 46 bytes |
1005 // Total: 88bytes | 1218 // Total: 88bytes |
1006 size += 88; | 1219 size += 88; |
1007 // The filename is stored twice (once in the central directory | 1220 // The filename is stored twice (once in the central directory |
1008 // and once in the local file header). | 1221 // and once in the local file header). |
1009 size += strlen(files[i]) * 2; | 1222 size += strlen(files[i]) * 2; |
1010 } | 1223 } |
1011 return size; | 1224 return size; |
1012 } | 1225 } |
1013 | 1226 |
1014 } // namespace devtools_ijar | 1227 } // namespace devtools_ijar |
OLD | NEW |