001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 021import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 022import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 023import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 024import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 025 026import java.io.BufferedInputStream; 027import java.io.ByteArrayInputStream; 028import java.io.Closeable; 029import java.io.EOFException; 030import java.io.File; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.SequenceInputStream; 034import java.nio.Buffer; 035import java.nio.ByteBuffer; 036import java.nio.channels.FileChannel; 037import java.nio.channels.SeekableByteChannel; 038import java.nio.file.Files; 039import java.nio.file.Path; 040import java.nio.file.StandardOpenOption; 041import java.util.Arrays; 042import java.util.Collections; 043import java.util.Comparator; 044import java.util.EnumSet; 045import java.util.Enumeration; 046import java.util.HashMap; 047import java.util.LinkedList; 048import java.util.List; 049import java.util.Map; 050import java.util.zip.Inflater; 051import java.util.zip.ZipException; 052 053import org.apache.commons.compress.archivers.EntryStreamOffsets; 054import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 055import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 056import org.apache.commons.compress.utils.BoundedArchiveInputStream; 057import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 058import org.apache.commons.compress.utils.CountingInputStream; 059import org.apache.commons.compress.utils.IOUtils; 060import org.apache.commons.compress.utils.InputStreamStatistics; 061 062/** 063 * Replacement for {@code java.util.ZipFile}. 064 * 065 * <p>This class adds support for file name encodings other than UTF-8 066 * (which is required to work on ZIP files created by native zip tools 067 * and is able to skip a preamble like the one found in self 068 * extracting archives. Furthermore it returns instances of 069 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} 070 * instead of {@code java.util.zip.ZipEntry}.</p> 071 * 072 * <p>It doesn't extend {@code java.util.zip.ZipFile} as it would 073 * have to reimplement all methods anyway. Like 074 * {@code java.util.ZipFile}, it uses SeekableByteChannel under the 075 * covers and supports compressed and uncompressed entries. As of 076 * Apache Commons Compress 1.3 it also transparently supports Zip64 077 * extensions and thus individual entries and archives larger than 4 078 * GB or with more than 65536 entries.</p> 079 * 080 * <p>The method signatures mimic the ones of 081 * {@code java.util.zip.ZipFile}, with a couple of exceptions: 082 * 083 * <ul> 084 * <li>There is no getName method.</li> 085 * <li>entries has been renamed to getEntries.</li> 086 * <li>getEntries and getEntry return 087 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} 088 * instances.</li> 089 * <li>close is allowed to throw IOException.</li> 090 * </ul> 091 * 092 */ 093public class ZipFile implements Closeable { 094 private static final int HASH_SIZE = 509; 095 static final int NIBLET_MASK = 0x0f; 096 static final int BYTE_SHIFT = 8; 097 private static final int POS_0 = 0; 098 private static final int POS_1 = 1; 099 private static final int POS_2 = 2; 100 private static final int POS_3 = 3; 101 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 102 103 /** 104 * List of entries in the order they appear inside the central 105 * directory. 106 */ 107 private final List<ZipArchiveEntry> entries = 108 new LinkedList<>(); 109 110 /** 111 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 112 */ 113 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 114 new HashMap<>(HASH_SIZE); 115 116 /** 117 * The encoding to use for file names and the file comment. 118 * 119 * <p>For a list of possible values see <a 120 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 121 * Defaults to UTF-8.</p> 122 */ 123 private final String encoding; 124 125 /** 126 * The zip encoding to use for file names and the file comment. 127 */ 128 private final ZipEncoding zipEncoding; 129 130 /** 131 * File name of actual source. 132 */ 133 private final String archiveName; 134 135 /** 136 * The actual data source. 137 */ 138 private final SeekableByteChannel archive; 139 140 /** 141 * Whether to look for and use Unicode extra fields. 142 */ 143 private final boolean useUnicodeExtraFields; 144 145 /** 146 * Whether the file is closed. 147 */ 148 private volatile boolean closed = true; 149 150 /** 151 * Whether the zip archive is a split zip archive 152 */ 153 private final boolean isSplitZipArchive; 154 155 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 156 private final byte[] dwordBuf = new byte[DWORD]; 157 private final byte[] wordBuf = new byte[WORD]; 158 private final byte[] cfhBuf = new byte[CFH_LEN]; 159 private final byte[] shortBuf = new byte[SHORT]; 160 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 161 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 162 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 163 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf); 164 165 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset; 166 private long centralDirectoryStartOffset; 167 168 /** 169 * Opens the given file for reading, assuming "UTF8" for file names. 170 * 171 * @param f the archive. 172 * 173 * @throws IOException if an error occurs while reading the file. 174 */ 175 public ZipFile(final File f) throws IOException { 176 this(f, ZipEncodingHelper.UTF8); 177 } 178 179 /** 180 * Opens the given path for reading, assuming "UTF8" for file names. 181 * @param path path to the archive. 182 * @throws IOException if an error occurs while reading the file. 183 * @since 1.22 184 */ 185 public ZipFile(final Path path) throws IOException { 186 this(path, ZipEncodingHelper.UTF8); 187 } 188 189 /** 190 * Opens the given file for reading, assuming "UTF8". 191 * 192 * @param name name of the archive. 193 * 194 * @throws IOException if an error occurs while reading the file. 195 */ 196 public ZipFile(final String name) throws IOException { 197 this(new File(name).toPath(), ZipEncodingHelper.UTF8); 198 } 199 200 /** 201 * Opens the given file for reading, assuming the specified 202 * encoding for file names, scanning unicode extra fields. 203 * 204 * @param name name of the archive. 205 * @param encoding the encoding to use for file names, use null 206 * for the platform's default encoding 207 * 208 * @throws IOException if an error occurs while reading the file. 209 */ 210 public ZipFile(final String name, final String encoding) throws IOException { 211 this(new File(name).toPath(), encoding, true); 212 } 213 214 /** 215 * Opens the given file for reading, assuming the specified 216 * encoding for file names and scanning for unicode extra fields. 217 * 218 * @param f the archive. 219 * @param encoding the encoding to use for file names, use null 220 * for the platform's default encoding 221 * 222 * @throws IOException if an error occurs while reading the file. 223 */ 224 public ZipFile(final File f, final String encoding) throws IOException { 225 this(f.toPath(), encoding, true); 226 } 227 228 /** 229 * Opens the given path for reading, assuming the specified 230 * encoding for file names and scanning for unicode extra fields. 231 * @param path path to the archive. 232 * @param encoding the encoding to use for file names, use null 233 * for the platform's default encoding 234 * @throws IOException if an error occurs while reading the file. 235 * @since 1.22 236 */ 237 public ZipFile(final Path path, final String encoding) throws IOException { 238 this(path, encoding, true); 239 } 240 241 /** 242 * Opens the given file for reading, assuming the specified 243 * encoding for file names. 244 * 245 * @param f the archive. 246 * @param encoding the encoding to use for file names, use null 247 * for the platform's default encoding 248 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 249 * Extra Fields (if present) to set the file names. 250 * 251 * @throws IOException if an error occurs while reading the file. 252 */ 253 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 254 throws IOException { 255 this(f.toPath(), encoding, useUnicodeExtraFields, false); 256 } 257 258 /** 259 * Opens the given path for reading, assuming the specified 260 * encoding for file names. 261 * @param path path to the archive. 262 * @param encoding the encoding to use for file names, use null 263 * for the platform's default encoding 264 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 265 * Extra Fields (if present) to set the file names. 266 * @throws IOException if an error occurs while reading the file. 267 * @since 1.22 268 */ 269 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) 270 throws IOException { 271 this(path, encoding, useUnicodeExtraFields, false); 272 } 273 274 /** 275 * Opens the given file for reading, assuming the specified 276 * encoding for file names. 277 * 278 * 279 * <p>By default the central directory record and all local file headers of the archive will be read immediately 280 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 281 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 282 * may contain information not present inside of the central directory which will not be available when the argument 283 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 284 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 285 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 286 * true}.</p> 287 * 288 * @param f the archive. 289 * @param encoding the encoding to use for file names, use null 290 * for the platform's default encoding 291 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 292 * Extra Fields (if present) to set the file names. 293 * @param ignoreLocalFileHeader whether to ignore information 294 * stored inside the local file header (see the notes in this method's javadoc) 295 * 296 * @throws IOException if an error occurs while reading the file. 297 * @since 1.19 298 */ 299 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields, 300 final boolean ignoreLocalFileHeader) 301 throws IOException { 302 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 303 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 304 } 305 306 /** 307 * Opens the given path for reading, assuming the specified 308 * encoding for file names. 309 * <p>By default the central directory record and all local file headers of the archive will be read immediately 310 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 311 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 312 * may contain information not present inside of the central directory which will not be available when the argument 313 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 314 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 315 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 316 * true}.</p> 317 * @param path path to the archive. 318 * @param encoding the encoding to use for file names, use null 319 * for the platform's default encoding 320 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 321 * Extra Fields (if present) to set the file names. 322 * @param ignoreLocalFileHeader whether to ignore information 323 * stored inside the local file header (see the notes in this method's javadoc) 324 * @throws IOException if an error occurs while reading the file. 325 * @since 1.22 326 */ 327 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, 328 final boolean ignoreLocalFileHeader) 329 throws IOException { 330 this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)), 331 path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, 332 true, ignoreLocalFileHeader); 333 } 334 335 /** 336 * Opens the given channel for reading, assuming "UTF8" for file names. 337 * 338 * <p>{@link 339 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 340 * allows you to read from an in-memory archive.</p> 341 * 342 * @param channel the archive. 343 * 344 * @throws IOException if an error occurs while reading the file. 345 * @since 1.13 346 */ 347 public ZipFile(final SeekableByteChannel channel) 348 throws IOException { 349 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 350 } 351 352 /** 353 * Opens the given channel for reading, assuming the specified 354 * encoding for file names. 355 * 356 * <p>{@link 357 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 358 * allows you to read from an in-memory archive.</p> 359 * 360 * @param channel the archive. 361 * @param encoding the encoding to use for file names, use null 362 * for the platform's default encoding 363 * 364 * @throws IOException if an error occurs while reading the file. 365 * @since 1.13 366 */ 367 public ZipFile(final SeekableByteChannel channel, final String encoding) 368 throws IOException { 369 this(channel, "unknown archive", encoding, true); 370 } 371 372 /** 373 * Opens the given channel for reading, assuming the specified 374 * encoding for file names. 375 * 376 * <p>{@link 377 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 378 * allows you to read from an in-memory archive.</p> 379 * 380 * @param channel the archive. 381 * @param archiveName name of the archive, used for error messages only. 382 * @param encoding the encoding to use for file names, use null 383 * for the platform's default encoding 384 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 385 * Extra Fields (if present) to set the file names. 386 * 387 * @throws IOException if an error occurs while reading the file. 388 * @since 1.13 389 */ 390 public ZipFile(final SeekableByteChannel channel, final String archiveName, 391 final String encoding, final boolean useUnicodeExtraFields) 392 throws IOException { 393 this(channel, archiveName, encoding, useUnicodeExtraFields, false, false); 394 } 395 396 /** 397 * Opens the given channel for reading, assuming the specified 398 * encoding for file names. 399 * 400 * <p>{@link 401 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 402 * allows you to read from an in-memory archive.</p> 403 * 404 * <p>By default the central directory record and all local file headers of the archive will be read immediately 405 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 406 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 407 * may contain information not present inside of the central directory which will not be available when the argument 408 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 409 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 410 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 411 * true}.</p> 412 * 413 * @param channel the archive. 414 * @param archiveName name of the archive, used for error messages only. 415 * @param encoding the encoding to use for file names, use null 416 * for the platform's default encoding 417 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 418 * Extra Fields (if present) to set the file names. 419 * @param ignoreLocalFileHeader whether to ignore information 420 * stored inside the local file header (see the notes in this method's javadoc) 421 * 422 * @throws IOException if an error occurs while reading the file. 423 * @since 1.19 424 */ 425 public ZipFile(final SeekableByteChannel channel, final String archiveName, 426 final String encoding, final boolean useUnicodeExtraFields, 427 final boolean ignoreLocalFileHeader) 428 throws IOException { 429 this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader); 430 } 431 432 private ZipFile(final SeekableByteChannel channel, final String archiveName, 433 final String encoding, final boolean useUnicodeExtraFields, 434 final boolean closeOnError, final boolean ignoreLocalFileHeader) 435 throws IOException { 436 isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel); 437 438 this.archiveName = archiveName; 439 this.encoding = encoding; 440 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 441 this.useUnicodeExtraFields = useUnicodeExtraFields; 442 archive = channel; 443 boolean success = false; 444 try { 445 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 446 populateFromCentralDirectory(); 447 if (!ignoreLocalFileHeader) { 448 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 449 } 450 fillNameMap(); 451 success = true; 452 } catch (final IOException e) { 453 throw new IOException("Error on ZipFile " + archiveName, e); 454 } finally { 455 closed = !success; 456 if (!success && closeOnError) { 457 IOUtils.closeQuietly(archive); 458 } 459 } 460 } 461 462 /** 463 * The encoding to use for file names and the file comment. 464 * 465 * @return null if using the platform's default character encoding. 466 */ 467 public String getEncoding() { 468 return encoding; 469 } 470 471 /** 472 * Closes the archive. 473 * @throws IOException if an error occurs closing the archive. 474 */ 475 @Override 476 public void close() throws IOException { 477 // this flag is only written here and read in finalize() which 478 // can never be run in parallel. 479 // no synchronization needed. 480 closed = true; 481 482 archive.close(); 483 } 484 485 /** 486 * close a zipfile quietly; throw no io fault, do nothing 487 * on a null parameter 488 * @param zipfile file to close, can be null 489 */ 490 public static void closeQuietly(final ZipFile zipfile) { 491 IOUtils.closeQuietly(zipfile); 492 } 493 494 /** 495 * Returns all entries. 496 * 497 * <p>Entries will be returned in the same order they appear 498 * within the archive's central directory.</p> 499 * 500 * @return all entries as {@link ZipArchiveEntry} instances 501 */ 502 public Enumeration<ZipArchiveEntry> getEntries() { 503 return Collections.enumeration(entries); 504 } 505 506 /** 507 * Returns all entries in physical order. 508 * 509 * <p>Entries will be returned in the same order their contents 510 * appear within the archive.</p> 511 * 512 * @return all entries as {@link ZipArchiveEntry} instances 513 * 514 * @since 1.1 515 */ 516 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 517 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY); 518 Arrays.sort(allEntries, offsetComparator); 519 return Collections.enumeration(Arrays.asList(allEntries)); 520 } 521 522 /** 523 * Returns a named entry - or {@code null} if no entry by 524 * that name exists. 525 * 526 * <p>If multiple entries with the same name exist the first entry 527 * in the archive's central directory by that name is 528 * returned.</p> 529 * 530 * @param name name of the entry. 531 * @return the ZipArchiveEntry corresponding to the given name - or 532 * {@code null} if not present. 533 */ 534 public ZipArchiveEntry getEntry(final String name) { 535 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 536 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 537 } 538 539 /** 540 * Returns all named entries in the same order they appear within 541 * the archive's central directory. 542 * 543 * @param name name of the entry. 544 * @return the Iterable<ZipArchiveEntry> corresponding to the 545 * given name 546 * @since 1.6 547 */ 548 public Iterable<ZipArchiveEntry> getEntries(final String name) { 549 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 550 return entriesOfThatName != null ? entriesOfThatName 551 : Collections.emptyList(); 552 } 553 554 /** 555 * Returns all named entries in the same order their contents 556 * appear within the archive. 557 * 558 * @param name name of the entry. 559 * @return the Iterable<ZipArchiveEntry> corresponding to the 560 * given name 561 * @since 1.6 562 */ 563 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 564 ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY; 565 if (nameMap.containsKey(name)) { 566 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 567 Arrays.sort(entriesOfThatName, offsetComparator); 568 } 569 return Arrays.asList(entriesOfThatName); 570 } 571 572 /** 573 * Whether this class is able to read the given entry. 574 * 575 * <p>May return false if it is set up to use encryption or a 576 * compression method that hasn't been implemented yet.</p> 577 * @since 1.1 578 * @param ze the entry 579 * @return whether this class is able to read the given entry. 580 */ 581 public boolean canReadEntryData(final ZipArchiveEntry ze) { 582 return ZipUtil.canHandleEntryData(ze); 583 } 584 585 /** 586 * Expose the raw stream of the archive entry (compressed form). 587 * 588 * <p>This method does not relate to how/if we understand the payload in the 589 * stream, since we really only intend to move it on to somewhere else.</p> 590 * 591 * @param ze The entry to get the stream for 592 * @return The raw input stream containing (possibly) compressed data. 593 * @since 1.11 594 */ 595 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 596 if (!(ze instanceof Entry)) { 597 return null; 598 } 599 final long start = ze.getDataOffset(); 600 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) { 601 return null; 602 } 603 return createBoundedInputStream(start, ze.getCompressedSize()); 604 } 605 606 607 /** 608 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 609 * Compression and all other attributes will be as in this file. 610 * <p>This method transfers entries based on the central directory of the zip file.</p> 611 * 612 * @param target The zipArchiveOutputStream to write the entries to 613 * @param predicate A predicate that selects which entries to write 614 * @throws IOException on error 615 */ 616 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 617 throws IOException { 618 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 619 while (src.hasMoreElements()) { 620 final ZipArchiveEntry entry = src.nextElement(); 621 if (predicate.test( entry)) { 622 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 623 } 624 } 625 } 626 627 /** 628 * Returns an InputStream for reading the contents of the given entry. 629 * 630 * @param ze the entry to get the stream for. 631 * @return a stream to read the entry from. The returned stream 632 * implements {@link InputStreamStatistics}. 633 * @throws IOException if unable to create an input stream from the zipentry 634 */ 635 public InputStream getInputStream(final ZipArchiveEntry ze) 636 throws IOException { 637 if (!(ze instanceof Entry)) { 638 return null; 639 } 640 // cast validity is checked just above 641 ZipUtil.checkRequestedFeatures(ze); 642 final long start = getDataOffset(ze); 643 644 // doesn't get closed if the method is not supported - which 645 // should never happen because of the checkRequestedFeatures 646 // call above 647 final InputStream is = 648 new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR 649 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 650 case STORED: 651 return new StoredStatisticsStream(is); 652 case UNSHRINKING: 653 return new UnshrinkingInputStream(is); 654 case IMPLODING: 655 try { 656 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 657 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 658 } catch (final IllegalArgumentException ex) { 659 throw new IOException("bad IMPLODE data", ex); 660 } 661 case DEFLATED: 662 final Inflater inflater = new Inflater(true); 663 // Inflater with nowrap=true has this odd contract for a zero padding 664 // byte following the data stream; this used to be zlib's requirement 665 // and has been fixed a long time ago, but the contract persists so 666 // we comply. 667 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 668 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 669 inflater) { 670 @Override 671 public void close() throws IOException { 672 try { 673 super.close(); 674 } finally { 675 inflater.end(); 676 } 677 } 678 }; 679 case BZIP2: 680 return new BZip2CompressorInputStream(is); 681 case ENHANCED_DEFLATED: 682 return new Deflate64CompressorInputStream(is); 683 case AES_ENCRYPTED: 684 case EXPANDING_LEVEL_1: 685 case EXPANDING_LEVEL_2: 686 case EXPANDING_LEVEL_3: 687 case EXPANDING_LEVEL_4: 688 case JPEG: 689 case LZMA: 690 case PKWARE_IMPLODING: 691 case PPMD: 692 case TOKENIZATION: 693 case UNKNOWN: 694 case WAVPACK: 695 case XZ: 696 default: 697 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(ze.getMethod()), ze); 698 } 699 } 700 701 /** 702 * <p> 703 * Convenience method to return the entry's content as a String if isUnixSymlink() 704 * returns true for it, otherwise returns null. 705 * </p> 706 * 707 * <p>This method assumes the symbolic link's file name uses the 708 * same encoding that as been specified for this ZipFile.</p> 709 * 710 * @param entry ZipArchiveEntry object that represents the symbolic link 711 * @return entry's content as a String 712 * @throws IOException problem with content's input stream 713 * @since 1.5 714 */ 715 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 716 if (entry != null && entry.isUnixSymlink()) { 717 try (InputStream in = getInputStream(entry)) { 718 return zipEncoding.decode(IOUtils.toByteArray(in)); 719 } 720 } 721 return null; 722 } 723 724 /** 725 * Ensures that the close method of this zipfile is called when 726 * there are no more references to it. 727 * @see #close() 728 */ 729 @Override 730 protected void finalize() throws Throwable { 731 try { 732 if (!closed) { 733 System.err.println("Cleaning up unclosed ZipFile for archive " 734 + archiveName); 735 close(); 736 } 737 } finally { 738 super.finalize(); 739 } 740 } 741 742 /** 743 * Length of a "central directory" entry structure without file 744 * name, extra fields or comment. 745 */ 746 private static final int CFH_LEN = 747 /* version made by */ SHORT 748 /* version needed to extract */ + SHORT 749 /* general purpose bit flag */ + SHORT 750 /* compression method */ + SHORT 751 /* last mod file time */ + SHORT 752 /* last mod file date */ + SHORT 753 /* crc-32 */ + WORD 754 /* compressed size */ + WORD 755 /* uncompressed size */ + WORD 756 /* file name length */ + SHORT 757 /* extra field length */ + SHORT 758 /* file comment length */ + SHORT 759 /* disk number start */ + SHORT 760 /* internal file attributes */ + SHORT 761 /* external file attributes */ + WORD 762 /* relative offset of local header */ + WORD; 763 764 private static final long CFH_SIG = 765 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 766 767 /** 768 * Reads the central directory of the given archive and populates 769 * the internal tables with ZipArchiveEntry instances. 770 * 771 * <p>The ZipArchiveEntrys will know all data that can be obtained from 772 * the central directory alone, but not the data that requires the 773 * local file header or additional data to be read.</p> 774 * 775 * @return a map of zipentries that didn't have the language 776 * encoding flag set when read. 777 */ 778 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 779 throws IOException { 780 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 781 new HashMap<>(); 782 783 positionAtCentralDirectory(); 784 centralDirectoryStartOffset = archive.position(); 785 786 ((Buffer)wordBbuf).rewind(); 787 IOUtils.readFully(archive, wordBbuf); 788 long sig = ZipLong.getValue(wordBuf); 789 790 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 791 throw new IOException("Central directory is empty, can't expand" 792 + " corrupt archive."); 793 } 794 795 while (sig == CFH_SIG) { 796 readCentralDirectoryEntry(noUTF8Flag); 797 ((Buffer)wordBbuf).rewind(); 798 IOUtils.readFully(archive, wordBbuf); 799 sig = ZipLong.getValue(wordBuf); 800 } 801 return noUTF8Flag; 802 } 803 804 /** 805 * Reads an individual entry of the central directory, creats an 806 * ZipArchiveEntry from it and adds it to the global maps. 807 * 808 * @param noUTF8Flag map used to collect entries that don't have 809 * their UTF-8 flag set and whose name will be set by data read 810 * from the local file header later. The current entry may be 811 * added to this map. 812 */ 813 private void 814 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 815 throws IOException { 816 ((Buffer)cfhBbuf).rewind(); 817 IOUtils.readFully(archive, cfhBbuf); 818 int off = 0; 819 final Entry ze = new Entry(); 820 821 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 822 off += SHORT; 823 ze.setVersionMadeBy(versionMadeBy); 824 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 825 826 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 827 off += SHORT; // version required 828 829 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 830 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 831 final ZipEncoding entryEncoding = 832 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 833 if (hasUTF8Flag) { 834 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 835 } 836 ze.setGeneralPurposeBit(gpFlag); 837 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 838 839 off += SHORT; 840 841 //noinspection MagicConstant 842 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 843 off += SHORT; 844 845 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 846 ze.setTime(time); 847 off += WORD; 848 849 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 850 off += WORD; 851 852 long size = ZipLong.getValue(cfhBuf, off); 853 if (size < 0) { 854 throw new IOException("broken archive, entry with negative compressed size"); 855 } 856 ze.setCompressedSize(size); 857 off += WORD; 858 859 size = ZipLong.getValue(cfhBuf, off); 860 if (size < 0) { 861 throw new IOException("broken archive, entry with negative size"); 862 } 863 ze.setSize(size); 864 off += WORD; 865 866 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 867 off += SHORT; 868 if (fileNameLen < 0) { 869 throw new IOException("broken archive, entry with negative fileNameLen"); 870 } 871 872 final int extraLen = ZipShort.getValue(cfhBuf, off); 873 off += SHORT; 874 if (extraLen < 0) { 875 throw new IOException("broken archive, entry with negative extraLen"); 876 } 877 878 final int commentLen = ZipShort.getValue(cfhBuf, off); 879 off += SHORT; 880 if (commentLen < 0) { 881 throw new IOException("broken archive, entry with negative commentLen"); 882 } 883 884 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off)); 885 off += SHORT; 886 887 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 888 off += SHORT; 889 890 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 891 off += WORD; 892 893 final byte[] fileName = IOUtils.readRange(archive, fileNameLen); 894 if (fileName.length < fileNameLen) { 895 throw new EOFException(); 896 } 897 ze.setName(entryEncoding.decode(fileName), fileName); 898 899 // LFH offset, 900 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); 901 // data offset will be filled later 902 entries.add(ze); 903 904 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen); 905 if (cdExtraData.length < extraLen) { 906 throw new EOFException(); 907 } 908 try { 909 ze.setCentralDirectoryExtra(cdExtraData); 910 } catch (RuntimeException ex) { 911 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 912 z.initCause(ex); 913 throw z; 914 } 915 916 setSizesAndOffsetFromZip64Extra(ze); 917 sanityCheckLFHOffset(ze); 918 919 final byte[] comment = IOUtils.readRange(archive, commentLen); 920 if (comment.length < commentLen) { 921 throw new EOFException(); 922 } 923 ze.setComment(entryEncoding.decode(comment)); 924 925 if (!hasUTF8Flag && useUnicodeExtraFields) { 926 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 927 } 928 929 ze.setStreamContiguous(true); 930 } 931 932 private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException { 933 if (ze.getDiskNumberStart() < 0) { 934 throw new IOException("broken archive, entry with negative disk number"); 935 } 936 if (ze.getLocalHeaderOffset() < 0) { 937 throw new IOException("broken archive, entry with negative local file header offset"); 938 } 939 if (isSplitZipArchive) { 940 if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) { 941 throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory"); 942 } 943 if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber 944 && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) { 945 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 946 } 947 } else if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) { 948 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 949 } 950 } 951 952 /** 953 * If the entry holds a Zip64 extended information extra field, 954 * read sizes from there if the entry's sizes are set to 955 * 0xFFFFFFFFF, do the same for the offset of the local file 956 * header. 957 * 958 * <p>Ensures the Zip64 extra either knows both compressed and 959 * uncompressed size or neither of both as the internal logic in 960 * ExtraFieldUtils forces the field to create local header data 961 * even if they are never used - and here a field with only one 962 * size would be invalid.</p> 963 */ 964 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze) 965 throws IOException { 966 final ZipExtraField extra = 967 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 968 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) { 969 throw new ZipException("archive contains unparseable zip64 extra field"); 970 } 971 final Zip64ExtendedInformationExtraField z64 = 972 (Zip64ExtendedInformationExtraField) extra; 973 if (z64 != null) { 974 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 975 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 976 final boolean hasRelativeHeaderOffset = 977 ze.getLocalHeaderOffset() == ZIP64_MAGIC; 978 final boolean hasDiskStart = ze.getDiskNumberStart() == ZIP64_MAGIC_SHORT; 979 z64.reparseCentralDirectoryData(hasUncompressedSize, 980 hasCompressedSize, 981 hasRelativeHeaderOffset, 982 hasDiskStart); 983 984 if (hasUncompressedSize) { 985 final long size = z64.getSize().getLongValue(); 986 if (size < 0) { 987 throw new IOException("broken archive, entry with negative size"); 988 } 989 ze.setSize(size); 990 } else if (hasCompressedSize) { 991 z64.setSize(new ZipEightByteInteger(ze.getSize())); 992 } 993 994 if (hasCompressedSize) { 995 final long size = z64.getCompressedSize().getLongValue(); 996 if (size < 0) { 997 throw new IOException("broken archive, entry with negative compressed size"); 998 } 999 ze.setCompressedSize(size); 1000 } else if (hasUncompressedSize) { 1001 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 1002 } 1003 1004 if (hasRelativeHeaderOffset) { 1005 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 1006 } 1007 1008 if (hasDiskStart) { 1009 ze.setDiskNumberStart(z64.getDiskStartNumber().getValue()); 1010 } 1011 } 1012 } 1013 1014 /** 1015 * Length of the "End of central directory record" - which is 1016 * supposed to be the last structure of the archive - without file 1017 * comment. 1018 */ 1019 static final int MIN_EOCD_SIZE = 1020 /* end of central dir signature */ WORD 1021 /* number of this disk */ + SHORT 1022 /* number of the disk with the */ 1023 /* start of the central directory */ + SHORT 1024 /* total number of entries in */ 1025 /* the central dir on this disk */ + SHORT 1026 /* total number of entries in */ 1027 /* the central dir */ + SHORT 1028 /* size of the central directory */ + WORD 1029 /* offset of start of central */ 1030 /* directory with respect to */ 1031 /* the starting disk number */ + WORD 1032 /* zipfile comment length */ + SHORT; 1033 1034 /** 1035 * Maximum length of the "End of central directory record" with a 1036 * file comment. 1037 */ 1038 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 1039 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 1040 1041 /** 1042 * Offset of the field that holds the location of the first 1043 * central directory entry inside the "End of central directory 1044 * record" relative to the start of the "End of central directory 1045 * record". 1046 */ 1047 private static final int CFD_LOCATOR_OFFSET = 1048 /* end of central dir signature */ WORD 1049 /* number of this disk */ + SHORT 1050 /* number of the disk with the */ 1051 /* start of the central directory */ + SHORT 1052 /* total number of entries in */ 1053 /* the central dir on this disk */ + SHORT 1054 /* total number of entries in */ 1055 /* the central dir */ + SHORT 1056 /* size of the central directory */ + WORD; 1057 1058 /** 1059 * Offset of the field that holds the disk number of the first 1060 * central directory entry inside the "End of central directory 1061 * record" relative to the start of the "End of central directory 1062 * record". 1063 */ 1064 private static final int CFD_DISK_OFFSET = 1065 /* end of central dir signature */ WORD 1066 /* number of this disk */ + SHORT; 1067 1068 /** 1069 * Offset of the field that holds the location of the first 1070 * central directory entry inside the "End of central directory 1071 * record" relative to the "number of the disk with the start 1072 * of the central directory". 1073 */ 1074 private static final int CFD_LOCATOR_RELATIVE_OFFSET = 1075 /* total number of entries in */ 1076 /* the central dir on this disk */ + SHORT 1077 /* total number of entries in */ 1078 /* the central dir */ + SHORT 1079 /* size of the central directory */ + WORD; 1080 1081 /** 1082 * Length of the "Zip64 end of central directory locator" - which 1083 * should be right in front of the "end of central directory 1084 * record" if one is present at all. 1085 */ 1086 private static final int ZIP64_EOCDL_LENGTH = 1087 /* zip64 end of central dir locator sig */ WORD 1088 /* number of the disk with the start */ 1089 /* start of the zip64 end of */ 1090 /* central directory */ + WORD 1091 /* relative offset of the zip64 */ 1092 /* end of central directory record */ + DWORD 1093 /* total number of disks */ + WORD; 1094 1095 /** 1096 * Offset of the field that holds the location of the "Zip64 end 1097 * of central directory record" inside the "Zip64 end of central 1098 * directory locator" relative to the start of the "Zip64 end of 1099 * central directory locator". 1100 */ 1101 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 1102 /* zip64 end of central dir locator sig */ WORD 1103 /* number of the disk with the start */ 1104 /* start of the zip64 end of */ 1105 /* central directory */ + WORD; 1106 1107 /** 1108 * Offset of the field that holds the location of the first 1109 * central directory entry inside the "Zip64 end of central 1110 * directory record" relative to the start of the "Zip64 end of 1111 * central directory record". 1112 */ 1113 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 1114 /* zip64 end of central dir */ 1115 /* signature */ WORD 1116 /* size of zip64 end of central */ 1117 /* directory record */ + DWORD 1118 /* version made by */ + SHORT 1119 /* version needed to extract */ + SHORT 1120 /* number of this disk */ + WORD 1121 /* number of the disk with the */ 1122 /* start of the central directory */ + WORD 1123 /* total number of entries in the */ 1124 /* central directory on this disk */ + DWORD 1125 /* total number of entries in the */ 1126 /* central directory */ + DWORD 1127 /* size of the central directory */ + DWORD; 1128 1129 /** 1130 * Offset of the field that holds the disk number of the first 1131 * central directory entry inside the "Zip64 end of central 1132 * directory record" relative to the start of the "Zip64 end of 1133 * central directory record". 1134 */ 1135 private static final int ZIP64_EOCD_CFD_DISK_OFFSET = 1136 /* zip64 end of central dir */ 1137 /* signature */ WORD 1138 /* size of zip64 end of central */ 1139 /* directory record */ + DWORD 1140 /* version made by */ + SHORT 1141 /* version needed to extract */ + SHORT 1142 /* number of this disk */ + WORD; 1143 1144 /** 1145 * Offset of the field that holds the location of the first 1146 * central directory entry inside the "Zip64 end of central 1147 * directory record" relative to the "number of the disk 1148 * with the start of the central directory". 1149 */ 1150 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET = 1151 /* total number of entries in the */ 1152 /* central directory on this disk */ DWORD 1153 /* total number of entries in the */ 1154 /* central directory */ + DWORD 1155 /* size of the central directory */ + DWORD; 1156 1157 /** 1158 * Searches for either the "Zip64 end of central directory 1159 * locator" or the "End of central dir record", parses 1160 * it and positions the stream at the first central directory 1161 * record. 1162 */ 1163 private void positionAtCentralDirectory() 1164 throws IOException { 1165 positionAtEndOfCentralDirectoryRecord(); 1166 boolean found = false; 1167 final boolean searchedForZip64EOCD = 1168 archive.position() > ZIP64_EOCDL_LENGTH; 1169 if (searchedForZip64EOCD) { 1170 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 1171 ((Buffer)wordBbuf).rewind(); 1172 IOUtils.readFully(archive, wordBbuf); 1173 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 1174 wordBuf); 1175 } 1176 if (!found) { 1177 // not a ZIP64 archive 1178 if (searchedForZip64EOCD) { 1179 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 1180 } 1181 positionAtCentralDirectory32(); 1182 } else { 1183 positionAtCentralDirectory64(); 1184 } 1185 } 1186 1187 /** 1188 * Parses the "Zip64 end of central directory locator", 1189 * finds the "Zip64 end of central directory record" using the 1190 * parsed information, parses that and positions the stream at the 1191 * first central directory record. 1192 * 1193 * Expects stream to be positioned right behind the "Zip64 1194 * end of central directory locator"'s signature. 1195 */ 1196 private void positionAtCentralDirectory64() 1197 throws IOException { 1198 if (isSplitZipArchive) { 1199 ((Buffer)wordBbuf).rewind(); 1200 IOUtils.readFully(archive, wordBbuf); 1201 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf); 1202 1203 ((Buffer)dwordBbuf).rewind(); 1204 IOUtils.readFully(archive, dwordBbuf); 1205 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf); 1206 ((ZipSplitReadOnlySeekableByteChannel) archive) 1207 .position(diskNumberOfEOCD, relativeOffsetOfEOCD); 1208 } else { 1209 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 1210 - WORD /* signature has already been read */); 1211 ((Buffer)dwordBbuf).rewind(); 1212 IOUtils.readFully(archive, dwordBbuf); 1213 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 1214 } 1215 1216 ((Buffer)wordBbuf).rewind(); 1217 IOUtils.readFully(archive, wordBbuf); 1218 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 1219 throw new ZipException("Archive's ZIP64 end of central " 1220 + "directory locator is corrupt."); 1221 } 1222 1223 if (isSplitZipArchive) { 1224 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET 1225 - WORD /* signature has already been read */); 1226 ((Buffer)wordBbuf).rewind(); 1227 IOUtils.readFully(archive, wordBbuf); 1228 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf); 1229 1230 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET); 1231 1232 ((Buffer)dwordBbuf).rewind(); 1233 IOUtils.readFully(archive, dwordBbuf); 1234 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1235 ((ZipSplitReadOnlySeekableByteChannel) archive) 1236 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1237 } else { 1238 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 1239 - WORD /* signature has already been read */); 1240 ((Buffer)dwordBbuf).rewind(); 1241 IOUtils.readFully(archive, dwordBbuf); 1242 centralDirectoryStartDiskNumber = 0; 1243 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1244 archive.position(centralDirectoryStartRelativeOffset); 1245 } 1246 } 1247 1248 /** 1249 * Parses the "End of central dir record" and positions 1250 * the stream at the first central directory record. 1251 * 1252 * Expects stream to be positioned at the beginning of the 1253 * "End of central dir record". 1254 */ 1255 private void positionAtCentralDirectory32() 1256 throws IOException { 1257 if (isSplitZipArchive) { 1258 skipBytes(CFD_DISK_OFFSET); 1259 ((Buffer)shortBbuf).rewind(); 1260 IOUtils.readFully(archive, shortBbuf); 1261 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf); 1262 1263 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET); 1264 1265 ((Buffer)wordBbuf).rewind(); 1266 IOUtils.readFully(archive, wordBbuf); 1267 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1268 ((ZipSplitReadOnlySeekableByteChannel) archive) 1269 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1270 } else { 1271 skipBytes(CFD_LOCATOR_OFFSET); 1272 ((Buffer)wordBbuf).rewind(); 1273 IOUtils.readFully(archive, wordBbuf); 1274 centralDirectoryStartDiskNumber = 0; 1275 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1276 archive.position(centralDirectoryStartRelativeOffset); 1277 } 1278 } 1279 1280 /** 1281 * Searches for the and positions the stream at the start of the 1282 * "End of central dir record". 1283 */ 1284 private void positionAtEndOfCentralDirectoryRecord() 1285 throws IOException { 1286 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 1287 ZipArchiveOutputStream.EOCD_SIG); 1288 if (!found) { 1289 throw new ZipException("Archive is not a ZIP archive"); 1290 } 1291 } 1292 1293 /** 1294 * Searches the archive backwards from minDistance to maxDistance 1295 * for the given signature, positions the RandomaccessFile right 1296 * at the signature if it has been found. 1297 */ 1298 private boolean tryToLocateSignature(final long minDistanceFromEnd, 1299 final long maxDistanceFromEnd, 1300 final byte[] sig) throws IOException { 1301 boolean found = false; 1302 long off = archive.size() - minDistanceFromEnd; 1303 final long stopSearching = 1304 Math.max(0L, archive.size() - maxDistanceFromEnd); 1305 if (off >= 0) { 1306 for (; off >= stopSearching; off--) { 1307 archive.position(off); 1308 try { 1309 ((Buffer)wordBbuf).rewind(); 1310 IOUtils.readFully(archive, wordBbuf); 1311 ((Buffer)wordBbuf).flip(); 1312 } catch (final EOFException ex) { // NOSONAR 1313 break; 1314 } 1315 int curr = wordBbuf.get(); 1316 if (curr == sig[POS_0]) { 1317 curr = wordBbuf.get(); 1318 if (curr == sig[POS_1]) { 1319 curr = wordBbuf.get(); 1320 if (curr == sig[POS_2]) { 1321 curr = wordBbuf.get(); 1322 if (curr == sig[POS_3]) { 1323 found = true; 1324 break; 1325 } 1326 } 1327 } 1328 } 1329 } 1330 } 1331 if (found) { 1332 archive.position(off); 1333 } 1334 return found; 1335 } 1336 1337 /** 1338 * Skips the given number of bytes or throws an EOFException if 1339 * skipping failed. 1340 */ 1341 private void skipBytes(final int count) throws IOException { 1342 final long currentPosition = archive.position(); 1343 final long newPosition = currentPosition + count; 1344 if (newPosition > archive.size()) { 1345 throw new EOFException(); 1346 } 1347 archive.position(newPosition); 1348 } 1349 1350 /** 1351 * Number of bytes in local file header up to the "length of 1352 * file name" entry. 1353 */ 1354 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1355 /* local file header signature */ WORD 1356 /* version needed to extract */ + SHORT 1357 /* general purpose bit flag */ + SHORT 1358 /* compression method */ + SHORT 1359 /* last mod file time */ + SHORT 1360 /* last mod file date */ + SHORT 1361 /* crc-32 */ + WORD 1362 /* compressed size */ + WORD 1363 /* uncompressed size */ + (long) WORD; 1364 1365 /** 1366 * Walks through all recorded entries and adds the data available 1367 * from the local file header. 1368 * 1369 * <p>Also records the offsets for the data to read from the 1370 * entries.</p> 1371 */ 1372 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1373 entriesWithoutUTF8Flag) 1374 throws IOException { 1375 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1376 // entries is filled in populateFromCentralDirectory and 1377 // never modified 1378 final Entry ze = (Entry) zipArchiveEntry; 1379 final int[] lens = setDataOffset(ze); 1380 final int fileNameLen = lens[0]; 1381 final int extraFieldLen = lens[1]; 1382 skipBytes(fileNameLen); 1383 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen); 1384 if (localExtraData.length < extraFieldLen) { 1385 throw new EOFException(); 1386 } 1387 try { 1388 ze.setExtra(localExtraData); 1389 } catch (RuntimeException ex) { 1390 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1391 z.initCause(ex); 1392 throw z; 1393 } 1394 1395 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1396 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1397 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1398 nc.comment); 1399 } 1400 } 1401 } 1402 1403 private void fillNameMap() { 1404 entries.forEach(ze -> { 1405 // entries is filled in populateFromCentralDirectory and 1406 // never modified 1407 final String name = ze.getName(); 1408 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>()); 1409 entriesOfThatName.addLast(ze); 1410 }); 1411 } 1412 1413 private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException { 1414 long offset = ze.getLocalHeaderOffset(); 1415 if (isSplitZipArchive) { 1416 ((ZipSplitReadOnlySeekableByteChannel) archive) 1417 .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1418 // the offset should be updated to the global offset 1419 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH; 1420 } else { 1421 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1422 } 1423 ((Buffer)wordBbuf).rewind(); 1424 IOUtils.readFully(archive, wordBbuf); 1425 ((Buffer)wordBbuf).flip(); 1426 wordBbuf.get(shortBuf); 1427 final int fileNameLen = ZipShort.getValue(shortBuf); 1428 wordBbuf.get(shortBuf); 1429 final int extraFieldLen = ZipShort.getValue(shortBuf); 1430 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1431 + SHORT + SHORT + fileNameLen + extraFieldLen); 1432 if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) { 1433 throw new IOException("data for " + ze.getName() + " overlaps with central directory."); 1434 } 1435 return new int[] { fileNameLen, extraFieldLen }; 1436 } 1437 1438 private long getDataOffset(final ZipArchiveEntry ze) throws IOException { 1439 final long s = ze.getDataOffset(); 1440 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) { 1441 setDataOffset(ze); 1442 return ze.getDataOffset(); 1443 } 1444 return s; 1445 } 1446 1447 /** 1448 * Checks whether the archive starts with a LFH. If it doesn't, 1449 * it may be an empty archive. 1450 */ 1451 private boolean startsWithLocalFileHeader() throws IOException { 1452 archive.position(0); 1453 ((Buffer)wordBbuf).rewind(); 1454 IOUtils.readFully(archive, wordBbuf); 1455 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1456 } 1457 1458 /** 1459 * Creates new BoundedInputStream, according to implementation of 1460 * underlying archive channel. 1461 */ 1462 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) { 1463 if (start < 0 || remaining < 0 || start + remaining < start) { 1464 throw new IllegalArgumentException("Corrupted archive, stream boundaries" 1465 + " are out of range"); 1466 } 1467 return archive instanceof FileChannel ? 1468 new BoundedFileChannelInputStream(start, remaining) : 1469 new BoundedSeekableByteChannelInputStream(start, remaining, archive); 1470 } 1471 1472 /** 1473 * Lock-free implementation of BoundedInputStream. The 1474 * implementation uses positioned reads on the underlying archive 1475 * file channel and therefore performs significantly faster in 1476 * concurrent environment. 1477 */ 1478 private class BoundedFileChannelInputStream extends BoundedArchiveInputStream { 1479 private final FileChannel archive; 1480 1481 BoundedFileChannelInputStream(final long start, final long remaining) { 1482 super(start, remaining); 1483 archive = (FileChannel) ZipFile.this.archive; 1484 } 1485 1486 @Override 1487 protected int read(final long pos, final ByteBuffer buf) throws IOException { 1488 final int read = archive.read(buf, pos); 1489 ((Buffer)buf).flip(); 1490 return read; 1491 } 1492 } 1493 1494 private static final class NameAndComment { 1495 private final byte[] name; 1496 private final byte[] comment; 1497 private NameAndComment(final byte[] name, final byte[] comment) { 1498 this.name = name; 1499 this.comment = comment; 1500 } 1501 } 1502 1503 /** 1504 * Compares two ZipArchiveEntries based on their offset within the archive. 1505 * 1506 * <p>Won't return any meaningful results if one of the entries 1507 * isn't part of the archive at all.</p> 1508 * 1509 * @since 1.1 1510 */ 1511 private final Comparator<ZipArchiveEntry> offsetComparator = 1512 Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) 1513 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); 1514 1515 /** 1516 * Extends ZipArchiveEntry to store the offset within the archive. 1517 */ 1518 private static class Entry extends ZipArchiveEntry { 1519 1520 Entry() { 1521 } 1522 1523 @Override 1524 public int hashCode() { 1525 return 3 * super.hashCode() 1526 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 1527 } 1528 1529 @Override 1530 public boolean equals(final Object other) { 1531 if (super.equals(other)) { 1532 // super.equals would return false if other were not an Entry 1533 final Entry otherEntry = (Entry) other; 1534 return getLocalHeaderOffset() 1535 == otherEntry.getLocalHeaderOffset() 1536 && super.getDataOffset() 1537 == otherEntry.getDataOffset() 1538 && super.getDiskNumberStart() 1539 == otherEntry.getDiskNumberStart(); 1540 } 1541 return false; 1542 } 1543 } 1544 1545 private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 1546 StoredStatisticsStream(final InputStream in) { 1547 super(in); 1548 } 1549 1550 @Override 1551 public long getCompressedCount() { 1552 return super.getBytesRead(); 1553 } 1554 1555 @Override 1556 public long getUncompressedCount() { 1557 return getCompressedCount(); 1558 } 1559 } 1560}