001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.sevenz; 019 020import static java.nio.charset.StandardCharsets.UTF_16LE; 021 022import java.io.BufferedInputStream; 023import java.io.ByteArrayInputStream; 024import java.io.Closeable; 025import java.io.DataInputStream; 026import java.io.EOFException; 027import java.io.File; 028import java.io.FilterInputStream; 029import java.io.IOException; 030import java.io.InputStream; 031import java.nio.Buffer; 032import java.nio.ByteBuffer; 033import java.nio.ByteOrder; 034import java.nio.CharBuffer; 035import java.nio.channels.Channels; 036import java.nio.channels.SeekableByteChannel; 037import java.nio.file.Files; 038import java.nio.file.StandardOpenOption; 039import java.util.ArrayList; 040import java.util.Arrays; 041import java.util.BitSet; 042import java.util.EnumSet; 043import java.util.LinkedHashMap; 044import java.util.LinkedList; 045import java.util.List; 046import java.util.Map; 047import java.util.Objects; 048import java.util.zip.CRC32; 049import java.util.zip.CheckedInputStream; 050 051import org.apache.commons.compress.MemoryLimitException; 052import org.apache.commons.compress.utils.BoundedInputStream; 053import org.apache.commons.compress.utils.ByteUtils; 054import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 055import org.apache.commons.compress.utils.IOUtils; 056import org.apache.commons.compress.utils.InputStreamStatistics; 057 058/** 059 * Reads a 7z file, using SeekableByteChannel under 060 * the covers. 061 * <p> 062 * The 7z file format is a flexible container 063 * that can contain many compression and 064 * encryption types, but at the moment only 065 * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 066 * are supported. 067 * <p> 068 * The format is very Windows/Intel specific, 069 * so it uses little-endian byte order, 070 * doesn't store user/group or permission bits, 071 * and represents times using NTFS timestamps 072 * (100 nanosecond units since 1 January 1601). 073 * Hence the official tools recommend against 074 * using it for backup purposes on *nix, and 075 * recommend .tar.7z or .tar.lzma or .tar.xz 076 * instead. 077 * <p> 078 * Both the header and file contents may be 079 * compressed and/or encrypted. With both 080 * encrypted, neither file names nor file 081 * contents can be read, but the use of 082 * encryption isn't plausibly deniable. 083 * 084 * <p>Multi volume archives can be read by concatenating the parts in 085 * correct order - either manually or by using {link 086 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} 087 * for example.</p> 088 * 089 * @NotThreadSafe 090 * @since 1.6 091 */ 092public class SevenZFile implements Closeable { 093 static final int SIGNATURE_HEADER_SIZE = 32; 094 095 private static final String DEFAULT_FILE_NAME = "unknown archive"; 096 097 private final String fileName; 098 private SeekableByteChannel channel; 099 private final Archive archive; 100 private int currentEntryIndex = -1; 101 private int currentFolderIndex = -1; 102 private InputStream currentFolderInputStream; 103 private byte[] password; 104 private final SevenZFileOptions options; 105 106 private long compressedBytesReadFromCurrentEntry; 107 private long uncompressedBytesReadFromCurrentEntry; 108 109 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 110 111 // shared with SevenZOutputFile and tests, neither mutates it 112 static final byte[] sevenZSignature = { //NOSONAR 113 (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C 114 }; 115 116 /** 117 * Reads a file as 7z archive 118 * 119 * @param fileName the file to read 120 * @param password optional password if the archive is encrypted 121 * @throws IOException if reading the archive fails 122 * @since 1.17 123 */ 124 public SevenZFile(final File fileName, final char[] password) throws IOException { 125 this(fileName, password, SevenZFileOptions.DEFAULT); 126 } 127 128 /** 129 * Reads a file as 7z archive with additional options. 130 * 131 * @param fileName the file to read 132 * @param password optional password if the archive is encrypted 133 * @param options the options to apply 134 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 135 * @since 1.19 136 */ 137 public SevenZFile(final File fileName, final char[] password, final SevenZFileOptions options) throws IOException { 138 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), // NOSONAR 139 fileName.getAbsolutePath(), utf16Decode(password), true, options); 140 } 141 142 /** 143 * Reads a file as 7z archive 144 * 145 * @param fileName the file to read 146 * @param password optional password if the archive is encrypted - 147 * the byte array is supposed to be the UTF16-LE encoded 148 * representation of the password. 149 * @throws IOException if reading the archive fails 150 * @deprecated use the char[]-arg version for the password instead 151 */ 152 @Deprecated 153 public SevenZFile(final File fileName, final byte[] password) throws IOException { 154 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), 155 fileName.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 156 } 157 158 /** 159 * Reads a SeekableByteChannel as 7z archive 160 * 161 * <p>{@link 162 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 163 * allows you to read from an in-memory archive.</p> 164 * 165 * @param channel the channel to read 166 * @throws IOException if reading the archive fails 167 * @since 1.13 168 */ 169 public SevenZFile(final SeekableByteChannel channel) throws IOException { 170 this(channel, SevenZFileOptions.DEFAULT); 171 } 172 173 /** 174 * Reads a SeekableByteChannel as 7z archive with addtional options. 175 * 176 * <p>{@link 177 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 178 * allows you to read from an in-memory archive.</p> 179 * 180 * @param channel the channel to read 181 * @param options the options to apply 182 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 183 * @since 1.19 184 */ 185 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 186 this(channel, DEFAULT_FILE_NAME, null, options); 187 } 188 189 /** 190 * Reads a SeekableByteChannel as 7z archive 191 * 192 * <p>{@link 193 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 194 * allows you to read from an in-memory archive.</p> 195 * 196 * @param channel the channel to read 197 * @param password optional password if the archive is encrypted 198 * @throws IOException if reading the archive fails 199 * @since 1.17 200 */ 201 public SevenZFile(final SeekableByteChannel channel, 202 final char[] password) throws IOException { 203 this(channel, password, SevenZFileOptions.DEFAULT); 204 } 205 206 /** 207 * Reads a SeekableByteChannel as 7z archive with additional options. 208 * 209 * <p>{@link 210 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 211 * allows you to read from an in-memory archive.</p> 212 * 213 * @param channel the channel to read 214 * @param password optional password if the archive is encrypted 215 * @param options the options to apply 216 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 217 * @since 1.19 218 */ 219 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) 220 throws IOException { 221 this(channel, DEFAULT_FILE_NAME, password, options); 222 } 223 224 /** 225 * Reads a SeekableByteChannel as 7z archive 226 * 227 * <p>{@link 228 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 229 * allows you to read from an in-memory archive.</p> 230 * 231 * @param channel the channel to read 232 * @param fileName name of the archive - only used for error reporting 233 * @param password optional password if the archive is encrypted 234 * @throws IOException if reading the archive fails 235 * @since 1.17 236 */ 237 public SevenZFile(final SeekableByteChannel channel, final String fileName, 238 final char[] password) throws IOException { 239 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 240 } 241 242 /** 243 * Reads a SeekableByteChannel as 7z archive with addtional options. 244 * 245 * <p>{@link 246 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 247 * allows you to read from an in-memory archive.</p> 248 * 249 * @param channel the channel to read 250 * @param fileName name of the archive - only used for error reporting 251 * @param password optional password if the archive is encrypted 252 * @param options the options to apply 253 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 254 * @since 1.19 255 */ 256 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, 257 final SevenZFileOptions options) throws IOException { 258 this(channel, fileName, utf16Decode(password), false, options); 259 } 260 261 /** 262 * Reads a SeekableByteChannel as 7z archive 263 * 264 * <p>{@link 265 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 266 * allows you to read from an in-memory archive.</p> 267 * 268 * @param channel the channel to read 269 * @param fileName name of the archive - only used for error reporting 270 * @throws IOException if reading the archive fails 271 * @since 1.17 272 */ 273 public SevenZFile(final SeekableByteChannel channel, final String fileName) 274 throws IOException { 275 this(channel, fileName, SevenZFileOptions.DEFAULT); 276 } 277 278 /** 279 * Reads a SeekableByteChannel as 7z archive with additional options. 280 * 281 * <p>{@link 282 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 283 * allows you to read from an in-memory archive.</p> 284 * 285 * @param channel the channel to read 286 * @param fileName name of the archive - only used for error reporting 287 * @param options the options to apply 288 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 289 * @since 1.19 290 */ 291 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) 292 throws IOException { 293 this(channel, fileName, null, false, options); 294 } 295 296 /** 297 * Reads a SeekableByteChannel as 7z archive 298 * 299 * <p>{@link 300 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 301 * allows you to read from an in-memory archive.</p> 302 * 303 * @param channel the channel to read 304 * @param password optional password if the archive is encrypted - 305 * the byte array is supposed to be the UTF16-LE encoded 306 * representation of the password. 307 * @throws IOException if reading the archive fails 308 * @since 1.13 309 * @deprecated use the char[]-arg version for the password instead 310 */ 311 @Deprecated 312 public SevenZFile(final SeekableByteChannel channel, 313 final byte[] password) throws IOException { 314 this(channel, DEFAULT_FILE_NAME, password); 315 } 316 317 /** 318 * Reads a SeekableByteChannel as 7z archive 319 * 320 * <p>{@link 321 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 322 * allows you to read from an in-memory archive.</p> 323 * 324 * @param channel the channel to read 325 * @param fileName name of the archive - only used for error reporting 326 * @param password optional password if the archive is encrypted - 327 * the byte array is supposed to be the UTF16-LE encoded 328 * representation of the password. 329 * @throws IOException if reading the archive fails 330 * @since 1.13 331 * @deprecated use the char[]-arg version for the password instead 332 */ 333 @Deprecated 334 public SevenZFile(final SeekableByteChannel channel, final String fileName, 335 final byte[] password) throws IOException { 336 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 337 } 338 339 private SevenZFile(final SeekableByteChannel channel, final String filename, 340 final byte[] password, final boolean closeOnError, final SevenZFileOptions options) throws IOException { 341 boolean succeeded = false; 342 this.channel = channel; 343 this.fileName = filename; 344 this.options = options; 345 try { 346 archive = readHeaders(password); 347 if (password != null) { 348 this.password = Arrays.copyOf(password, password.length); 349 } else { 350 this.password = null; 351 } 352 succeeded = true; 353 } finally { 354 if (!succeeded && closeOnError) { 355 this.channel.close(); 356 } 357 } 358 } 359 360 /** 361 * Reads a file as unencrypted 7z archive 362 * 363 * @param fileName the file to read 364 * @throws IOException if reading the archive fails 365 */ 366 public SevenZFile(final File fileName) throws IOException { 367 this(fileName, SevenZFileOptions.DEFAULT); 368 } 369 370 /** 371 * Reads a file as unencrypted 7z archive 372 * 373 * @param fileName the file to read 374 * @param options the options to apply 375 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 376 * @since 1.19 377 */ 378 public SevenZFile(final File fileName, final SevenZFileOptions options) throws IOException { 379 this(fileName, null, options); 380 } 381 382 /** 383 * Closes the archive. 384 * @throws IOException if closing the file fails 385 */ 386 @Override 387 public void close() throws IOException { 388 if (channel != null) { 389 try { 390 channel.close(); 391 } finally { 392 channel = null; 393 if (password != null) { 394 Arrays.fill(password, (byte) 0); 395 } 396 password = null; 397 } 398 } 399 } 400 401 /** 402 * Returns the next Archive Entry in this archive. 403 * 404 * @return the next entry, 405 * or {@code null} if there are no more entries 406 * @throws IOException if the next entry could not be read 407 */ 408 public SevenZArchiveEntry getNextEntry() throws IOException { 409 if (currentEntryIndex >= archive.files.length - 1) { 410 return null; 411 } 412 ++currentEntryIndex; 413 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 414 if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) { 415 entry.setName(getDefaultName()); 416 } 417 buildDecodingStream(currentEntryIndex, false); 418 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 419 return entry; 420 } 421 422 /** 423 * Returns a copy of meta-data of all archive entries. 424 * 425 * <p>This method only provides meta-data, the entries can not be 426 * used to read the contents, you still need to process all 427 * entries in order using {@link #getNextEntry} for that.</p> 428 * 429 * <p>The content methods are only available for entries that have 430 * already been reached via {@link #getNextEntry}.</p> 431 * 432 * @return a copy of meta-data of all archive entries. 433 * @since 1.11 434 */ 435 public Iterable<SevenZArchiveEntry> getEntries() { 436 return new ArrayList<>(Arrays.asList(archive.files)); 437 } 438 439 private Archive readHeaders(final byte[] password) throws IOException { 440 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) 441 .order(ByteOrder.LITTLE_ENDIAN); 442 readFully(buf); 443 final byte[] signature = new byte[6]; 444 buf.get(signature); 445 if (!Arrays.equals(signature, sevenZSignature)) { 446 throw new IOException("Bad 7z signature"); 447 } 448 // 7zFormat.txt has it wrong - it's first major then minor 449 final byte archiveVersionMajor = buf.get(); 450 final byte archiveVersionMinor = buf.get(); 451 if (archiveVersionMajor != 0) { 452 throw new IOException(String.format("Unsupported 7z version (%d,%d)", 453 archiveVersionMajor, archiveVersionMinor)); 454 } 455 456 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 457 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 458 if (startHeaderCrc == 0) { 459 // This is an indication of a corrupt header - peek the next 20 bytes 460 final long currentPosition = channel.position(); 461 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 462 readFully(peekBuf); 463 channel.position(currentPosition); 464 // Header invalid if all data is 0 465 while (peekBuf.hasRemaining()) { 466 if (peekBuf.get()!=0) { 467 headerLooksValid = true; 468 break; 469 } 470 } 471 } else { 472 headerLooksValid = true; 473 } 474 475 if (headerLooksValid) { 476 return initializeArchive(readStartHeader(startHeaderCrc), password, true); 477 } 478 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 479 if (options.getTryToRecoverBrokenArchives()) { 480 return tryToLocateEndHeader(password); 481 } 482 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 483 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" 484 + " prematurely."); 485 } 486 487 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 488 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 489 final long searchLimit = 1024L * 1024 * 1; 490 // Main header, plus bytes that readStartHeader would read 491 final long previousDataSize = channel.position() + 20; 492 final long minPos; 493 // Determine minimal position - can't start before current position 494 if (channel.position() + searchLimit > channel.size()) { 495 minPos = channel.position(); 496 } else { 497 minPos = channel.size() - searchLimit; 498 } 499 long pos = channel.size() - 1; 500 // Loop: Try from end of archive 501 while (pos > minPos) { 502 pos--; 503 channel.position(pos); 504 ((Buffer)nidBuf).rewind(); 505 if (channel.read(nidBuf) < 1) { 506 throw new EOFException(); 507 } 508 final int nid = nidBuf.array()[0]; 509 // First indicator: Byte equals one of these header identifiers 510 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 511 try { 512 // Try to initialize Archive structure from here 513 final StartHeader startHeader = new StartHeader(); 514 startHeader.nextHeaderOffset = pos - previousDataSize; 515 startHeader.nextHeaderSize = channel.size() - pos; 516 final Archive result = initializeArchive(startHeader, password, false); 517 // Sanity check: There must be some data... 518 if (result.packSizes.length > 0 && result.files.length > 0) { 519 return result; 520 } 521 } catch (final Exception ignore) { 522 // Wrong guess... 523 } 524 } 525 } 526 throw new IOException("Start header corrupt and unable to guess end header"); 527 } 528 529 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 530 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 531 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 532 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 533 if (verifyCrc) { 534 final long position = channel.position(); 535 CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); 536 if (cis.skip(nextHeaderSizeInt) != nextHeaderSizeInt) { 537 throw new IOException("Problem computing NextHeader CRC-32"); 538 } 539 if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { 540 throw new IOException("NextHeader CRC-32 mismatch"); 541 } 542 channel.position(position); 543 } 544 Archive archive = new Archive(); 545 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 546 readFully(buf); 547 int nid = getUnsignedByte(buf); 548 if (nid == NID.kEncodedHeader) { 549 buf = readEncodedHeader(buf, archive, password); 550 // Archive gets rebuilt with the new header 551 archive = new Archive(); 552 nid = getUnsignedByte(buf); 553 } 554 if (nid != NID.kHeader) { 555 throw new IOException("Broken or unsupported archive: no Header"); 556 } 557 readHeader(buf, archive); 558 archive.subStreamsInfo = null; 559 return archive; 560 } 561 562 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 563 final StartHeader startHeader = new StartHeader(); 564 // using Stream rather than ByteBuffer for the benefit of the 565 // built-in CRC check 566 try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( 567 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 568 startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 569 if (startHeader.nextHeaderOffset < 0 570 || startHeader.nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 571 throw new IOException("nextHeaderOffset is out of bounds"); 572 } 573 574 startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 575 final long nextHeaderEnd = startHeader.nextHeaderOffset + startHeader.nextHeaderSize; 576 if (nextHeaderEnd < startHeader.nextHeaderOffset 577 || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 578 throw new IOException("nextHeaderSize is out of bounds"); 579 } 580 581 startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 582 583 return startHeader; 584 } 585 } 586 587 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 588 final int pos = header.position(); 589 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 590 stats.assertValidity(options.getMaxMemoryLimitInKb()); 591 ((Buffer)header).position(pos); 592 593 int nid = getUnsignedByte(header); 594 595 if (nid == NID.kArchiveProperties) { 596 readArchiveProperties(header); 597 nid = getUnsignedByte(header); 598 } 599 600 if (nid == NID.kAdditionalStreamsInfo) { 601 throw new IOException("Additional streams unsupported"); 602 //nid = getUnsignedByte(header); 603 } 604 605 if (nid == NID.kMainStreamsInfo) { 606 readStreamsInfo(header, archive); 607 nid = getUnsignedByte(header); 608 } 609 610 if (nid == NID.kFilesInfo) { 611 readFilesInfo(header, archive); 612 nid = getUnsignedByte(header); 613 } 614 } 615 616 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) 617 throws IOException { 618 final ArchiveStatistics stats = new ArchiveStatistics(); 619 620 int nid = getUnsignedByte(header); 621 622 if (nid == NID.kArchiveProperties) { 623 sanityCheckArchiveProperties(header); 624 nid = getUnsignedByte(header); 625 } 626 627 if (nid == NID.kAdditionalStreamsInfo) { 628 throw new IOException("Additional streams unsupported"); 629 //nid = getUnsignedByte(header); 630 } 631 632 if (nid == NID.kMainStreamsInfo) { 633 sanityCheckStreamsInfo(header, stats); 634 nid = getUnsignedByte(header); 635 } 636 637 if (nid == NID.kFilesInfo) { 638 sanityCheckFilesInfo(header, stats); 639 nid = getUnsignedByte(header); 640 } 641 642 if (nid != NID.kEnd) { 643 throw new IOException("Badly terminated header, found " + nid); 644 } 645 646 return stats; 647 } 648 649 private void readArchiveProperties(final ByteBuffer input) throws IOException { 650 // FIXME: the reference implementation just throws them away? 651 int nid = getUnsignedByte(input); 652 while (nid != NID.kEnd) { 653 final long propertySize = readUint64(input); 654 final byte[] property = new byte[(int)propertySize]; 655 get(input, property); 656 nid = getUnsignedByte(input); 657 } 658 } 659 660 private void sanityCheckArchiveProperties(final ByteBuffer header) 661 throws IOException { 662 int nid = getUnsignedByte(header); 663 while (nid != NID.kEnd) { 664 final int propertySize = 665 assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 666 if (skipBytesFully(header, propertySize) < propertySize) { 667 throw new IOException("invalid property size"); 668 } 669 nid = getUnsignedByte(header); 670 } 671 } 672 673 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, 674 final byte[] password) throws IOException { 675 final int pos = header.position(); 676 final ArchiveStatistics stats = new ArchiveStatistics(); 677 sanityCheckStreamsInfo(header, stats); 678 stats.assertValidity(options.getMaxMemoryLimitInKb()); 679 ((Buffer)header).position(pos); 680 681 readStreamsInfo(header, archive); 682 683 if (archive.folders == null || archive.folders.length == 0) { 684 throw new IOException("no folders, can't read encoded header"); 685 } 686 if (archive.packSizes == null || archive.packSizes.length == 0) { 687 throw new IOException("no packed streams, can't read encoded header"); 688 } 689 690 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 691 final Folder folder = archive.folders[0]; 692 final int firstPackStreamIndex = 0; 693 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 694 0; 695 696 channel.position(folderOffset); 697 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, 698 archive.packSizes[firstPackStreamIndex]); 699 for (final Coder coder : folder.getOrderedCoders()) { 700 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 701 throw new IOException("Multi input/output stream coders are not yet supported"); 702 } 703 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR 704 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 705 } 706 if (folder.hasCrc) { 707 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, 708 folder.getUnpackSize(), folder.crc); 709 } 710 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 711 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 712 if (nextHeader.length < unpackSize) { 713 throw new IOException("premature end of stream"); 714 } 715 inputStreamStack.close(); 716 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 717 } 718 719 private void sanityCheckStreamsInfo(final ByteBuffer header, 720 final ArchiveStatistics stats) throws IOException { 721 int nid = getUnsignedByte(header); 722 723 if (nid == NID.kPackInfo) { 724 sanityCheckPackInfo(header, stats); 725 nid = getUnsignedByte(header); 726 } 727 728 if (nid == NID.kUnpackInfo) { 729 sanityCheckUnpackInfo(header, stats); 730 nid = getUnsignedByte(header); 731 } 732 733 if (nid == NID.kSubStreamsInfo) { 734 sanityCheckSubStreamsInfo(header, stats); 735 nid = getUnsignedByte(header); 736 } 737 738 if (nid != NID.kEnd) { 739 throw new IOException("Badly terminated StreamsInfo"); 740 } 741 } 742 743 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 744 int nid = getUnsignedByte(header); 745 746 if (nid == NID.kPackInfo) { 747 readPackInfo(header, archive); 748 nid = getUnsignedByte(header); 749 } 750 751 if (nid == NID.kUnpackInfo) { 752 readUnpackInfo(header, archive); 753 nid = getUnsignedByte(header); 754 } else { 755 // archive without unpack/coders info 756 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 757 } 758 759 if (nid == NID.kSubStreamsInfo) { 760 readSubStreamsInfo(header, archive); 761 nid = getUnsignedByte(header); 762 } 763 } 764 765 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 766 final long packPos = readUint64(header); 767 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() 768 || SIGNATURE_HEADER_SIZE + packPos < 0) { 769 throw new IOException("packPos (" + packPos + ") is out of range"); 770 } 771 final long numPackStreams = readUint64(header); 772 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 773 int nid = getUnsignedByte(header); 774 if (nid == NID.kSize) { 775 long totalPackSizes = 0; 776 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 777 final long packSize = readUint64(header); 778 totalPackSizes += packSize; 779 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 780 if (packSize < 0 781 || endOfPackStreams > channel.size() 782 || endOfPackStreams < packPos) { 783 throw new IOException("packSize (" + packSize + ") is out of range"); 784 } 785 } 786 nid = getUnsignedByte(header); 787 } 788 789 if (nid == NID.kCRC) { 790 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams) 791 .cardinality(); 792 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 793 throw new IOException("invalid number of CRCs in PackInfo"); 794 } 795 nid = getUnsignedByte(header); 796 } 797 798 if (nid != NID.kEnd) { 799 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 800 } 801 } 802 803 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 804 archive.packPos = readUint64(header); 805 final int numPackStreamsInt = (int) readUint64(header); 806 int nid = getUnsignedByte(header); 807 if (nid == NID.kSize) { 808 archive.packSizes = new long[numPackStreamsInt]; 809 for (int i = 0; i < archive.packSizes.length; i++) { 810 archive.packSizes[i] = readUint64(header); 811 } 812 nid = getUnsignedByte(header); 813 } 814 815 if (nid == NID.kCRC) { 816 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 817 archive.packCrcs = new long[numPackStreamsInt]; 818 for (int i = 0; i < numPackStreamsInt; i++) { 819 if (archive.packCrcsDefined.get(i)) { 820 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 821 } 822 } 823 824 nid = getUnsignedByte(header); 825 } 826 } 827 828 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) 829 throws IOException { 830 int nid = getUnsignedByte(header); 831 if (nid != NID.kFolder) { 832 throw new IOException("Expected kFolder, got " + nid); 833 } 834 final long numFolders = readUint64(header); 835 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 836 final int external = getUnsignedByte(header); 837 if (external != 0) { 838 throw new IOException("External unsupported"); 839 } 840 841 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 842 for (int i = 0; i < stats.numberOfFolders; i++) { 843 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 844 } 845 846 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 847 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 848 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 849 throw new IOException("archive doesn't contain enough packed streams"); 850 } 851 852 nid = getUnsignedByte(header); 853 if (nid != NID.kCodersUnpackSize) { 854 throw new IOException("Expected kCodersUnpackSize, got " + nid); 855 } 856 857 for (final int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 858 for (int i = 0; i < numberOfOutputStreams; i++) { 859 final long unpackSize = readUint64(header); 860 if (unpackSize < 0) { 861 throw new IllegalArgumentException("negative unpackSize"); 862 } 863 } 864 } 865 866 nid = getUnsignedByte(header); 867 if (nid == NID.kCRC) { 868 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 869 final int crcsDefined = stats.folderHasCrc.cardinality(); 870 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 871 throw new IOException("invalid number of CRCs in UnpackInfo"); 872 } 873 nid = getUnsignedByte(header); 874 } 875 876 if (nid != NID.kEnd) { 877 throw new IOException("Badly terminated UnpackInfo"); 878 } 879 } 880 881 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 882 int nid = getUnsignedByte(header); 883 final int numFoldersInt = (int) readUint64(header); 884 final Folder[] folders = new Folder[numFoldersInt]; 885 archive.folders = folders; 886 /* final int external = */ getUnsignedByte(header); 887 for (int i = 0; i < numFoldersInt; i++) { 888 folders[i] = readFolder(header); 889 } 890 891 nid = getUnsignedByte(header); 892 for (final Folder folder : folders) { 893 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 894 folder.unpackSizes = new long[(int)folder.totalOutputStreams]; 895 for (int i = 0; i < folder.totalOutputStreams; i++) { 896 folder.unpackSizes[i] = readUint64(header); 897 } 898 } 899 900 nid = getUnsignedByte(header); 901 if (nid == NID.kCRC) { 902 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 903 for (int i = 0; i < numFoldersInt; i++) { 904 if (crcsDefined.get(i)) { 905 folders[i].hasCrc = true; 906 folders[i].crc = 0xffffFFFFL & getInt(header); 907 } else { 908 folders[i].hasCrc = false; 909 } 910 } 911 912 nid = getUnsignedByte(header); 913 } 914 } 915 916 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 917 918 int nid = getUnsignedByte(header); 919 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 920 if (nid == NID.kNumUnpackStream) { 921 for (int i = 0; i < stats.numberOfFolders; i++) { 922 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 923 } 924 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().mapToLong(Integer::longValue).sum(); 925 nid = getUnsignedByte(header); 926 } else { 927 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 928 } 929 930 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 931 932 if (nid == NID.kSize) { 933 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 934 if (numUnpackSubStreams == 0) { 935 continue; 936 } 937 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 938 final long size = readUint64(header); 939 if (size < 0) { 940 throw new IOException("negative unpackSize"); 941 } 942 } 943 } 944 nid = getUnsignedByte(header); 945 } 946 947 int numDigests = 0; 948 if (numUnpackSubStreamsPerFolder.isEmpty()) { 949 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders 950 : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 951 } else { 952 int folderIdx = 0; 953 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 954 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null 955 || !stats.folderHasCrc.get(folderIdx++)) { 956 numDigests += numUnpackSubStreams; 957 } 958 } 959 } 960 961 if (nid == NID.kCRC) { 962 assertFitsIntoNonNegativeInt("numDigests", numDigests); 963 final int missingCrcs = readAllOrBits(header, numDigests) 964 .cardinality(); 965 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 966 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 967 } 968 nid = getUnsignedByte(header); 969 } 970 971 if (nid != NID.kEnd) { 972 throw new IOException("Badly terminated SubStreamsInfo"); 973 } 974 } 975 976 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 977 for (final Folder folder : archive.folders) { 978 folder.numUnpackSubStreams = 1; 979 } 980 long unpackStreamsCount = archive.folders.length; 981 982 int nid = getUnsignedByte(header); 983 if (nid == NID.kNumUnpackStream) { 984 unpackStreamsCount = 0; 985 for (final Folder folder : archive.folders) { 986 final long numStreams = readUint64(header); 987 folder.numUnpackSubStreams = (int)numStreams; 988 unpackStreamsCount += numStreams; 989 } 990 nid = getUnsignedByte(header); 991 } 992 993 final int totalUnpackStreams = (int) unpackStreamsCount; 994 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); 995 subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; 996 subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); 997 subStreamsInfo.crcs = new long[totalUnpackStreams]; 998 999 int nextUnpackStream = 0; 1000 for (final Folder folder : archive.folders) { 1001 if (folder.numUnpackSubStreams == 0) { 1002 continue; 1003 } 1004 long sum = 0; 1005 if (nid == NID.kSize) { 1006 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1007 final long size = readUint64(header); 1008 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1009 sum += size; 1010 } 1011 } 1012 if (sum > folder.getUnpackSize()) { 1013 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1014 } 1015 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1016 } 1017 if (nid == NID.kSize) { 1018 nid = getUnsignedByte(header); 1019 } 1020 1021 int numDigests = 0; 1022 for (final Folder folder : archive.folders) { 1023 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1024 numDigests += folder.numUnpackSubStreams; 1025 } 1026 } 1027 1028 if (nid == NID.kCRC) { 1029 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1030 final long[] missingCrcs = new long[numDigests]; 1031 for (int i = 0; i < numDigests; i++) { 1032 if (hasMissingCrc.get(i)) { 1033 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1034 } 1035 } 1036 int nextCrc = 0; 1037 int nextMissingCrc = 0; 1038 for (final Folder folder: archive.folders) { 1039 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1040 subStreamsInfo.hasCrc.set(nextCrc, true); 1041 subStreamsInfo.crcs[nextCrc] = folder.crc; 1042 ++nextCrc; 1043 } else { 1044 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1045 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1046 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1047 ++nextCrc; 1048 ++nextMissingCrc; 1049 } 1050 } 1051 } 1052 1053 nid = getUnsignedByte(header); 1054 } 1055 1056 archive.subStreamsInfo = subStreamsInfo; 1057 } 1058 1059 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) 1060 throws IOException { 1061 1062 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1063 if (numCoders == 0) { 1064 throw new IOException("Folder without coders"); 1065 } 1066 stats.numberOfCoders += numCoders; 1067 1068 long totalOutStreams = 0; 1069 long totalInStreams = 0; 1070 for (int i = 0; i < numCoders; i++) { 1071 final int bits = getUnsignedByte(header); 1072 final int idSize = bits & 0xf; 1073 get(header, new byte[idSize]); 1074 1075 final boolean isSimple = (bits & 0x10) == 0; 1076 final boolean hasAttributes = (bits & 0x20) != 0; 1077 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1078 if (moreAlternativeMethods) { 1079 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1080 "The reference implementation doesn't support them either."); 1081 } 1082 1083 if (isSimple) { 1084 totalInStreams++; 1085 totalOutStreams++; 1086 } else { 1087 totalInStreams += 1088 assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1089 totalOutStreams += 1090 assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1091 } 1092 1093 if (hasAttributes) { 1094 final int propertiesSize = 1095 assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1096 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1097 throw new IOException("invalid propertiesSize in folder"); 1098 } 1099 } 1100 } 1101 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1102 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1103 stats.numberOfOutStreams += totalOutStreams; 1104 stats.numberOfInStreams += totalInStreams; 1105 1106 if (totalOutStreams == 0) { 1107 throw new IOException("Total output streams can't be 0"); 1108 } 1109 1110 final int numBindPairs = 1111 assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1112 if (totalInStreams < numBindPairs) { 1113 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1114 } 1115 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1116 for (int i = 0; i < numBindPairs; i++) { 1117 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1118 if (totalInStreams <= inIndex) { 1119 throw new IOException("inIndex is bigger than number of inStreams"); 1120 } 1121 inStreamsBound.set(inIndex); 1122 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1123 if (totalOutStreams <= outIndex) { 1124 throw new IOException("outIndex is bigger than number of outStreams"); 1125 } 1126 } 1127 1128 final int numPackedStreams = 1129 assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 1130 1131 if (numPackedStreams == 1) { 1132 if (inStreamsBound.nextClearBit(0) == -1) { 1133 throw new IOException("Couldn't find stream's bind pair index"); 1134 } 1135 } else { 1136 for (int i = 0; i < numPackedStreams; i++) { 1137 final int packedStreamIndex = 1138 assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 1139 if (packedStreamIndex >= totalInStreams) { 1140 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 1141 } 1142 } 1143 } 1144 1145 return (int) totalOutStreams; 1146 } 1147 1148 private Folder readFolder(final ByteBuffer header) throws IOException { 1149 final Folder folder = new Folder(); 1150 1151 final long numCoders = readUint64(header); 1152 final Coder[] coders = new Coder[(int)numCoders]; 1153 long totalInStreams = 0; 1154 long totalOutStreams = 0; 1155 for (int i = 0; i < coders.length; i++) { 1156 coders[i] = new Coder(); 1157 final int bits = getUnsignedByte(header); 1158 final int idSize = bits & 0xf; 1159 final boolean isSimple = (bits & 0x10) == 0; 1160 final boolean hasAttributes = (bits & 0x20) != 0; 1161 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1162 1163 coders[i].decompressionMethodId = new byte[idSize]; 1164 get(header, coders[i].decompressionMethodId); 1165 if (isSimple) { 1166 coders[i].numInStreams = 1; 1167 coders[i].numOutStreams = 1; 1168 } else { 1169 coders[i].numInStreams = readUint64(header); 1170 coders[i].numOutStreams = readUint64(header); 1171 } 1172 totalInStreams += coders[i].numInStreams; 1173 totalOutStreams += coders[i].numOutStreams; 1174 if (hasAttributes) { 1175 final long propertiesSize = readUint64(header); 1176 coders[i].properties = new byte[(int)propertiesSize]; 1177 get(header, coders[i].properties); 1178 } 1179 // would need to keep looping as above: 1180 if (moreAlternativeMethods) { 1181 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1182 "The reference implementation doesn't support them either."); 1183 } 1184 } 1185 folder.coders = coders; 1186 folder.totalInputStreams = totalInStreams; 1187 folder.totalOutputStreams = totalOutStreams; 1188 1189 final long numBindPairs = totalOutStreams - 1; 1190 final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; 1191 for (int i = 0; i < bindPairs.length; i++) { 1192 bindPairs[i] = new BindPair(); 1193 bindPairs[i].inIndex = readUint64(header); 1194 bindPairs[i].outIndex = readUint64(header); 1195 } 1196 folder.bindPairs = bindPairs; 1197 1198 final long numPackedStreams = totalInStreams - numBindPairs; 1199 final long[] packedStreams = new long[(int)numPackedStreams]; 1200 if (numPackedStreams == 1) { 1201 int i; 1202 for (i = 0; i < (int)totalInStreams; i++) { 1203 if (folder.findBindPairForInStream(i) < 0) { 1204 break; 1205 } 1206 } 1207 packedStreams[0] = i; 1208 } else { 1209 for (int i = 0; i < (int)numPackedStreams; i++) { 1210 packedStreams[i] = readUint64(header); 1211 } 1212 } 1213 folder.packedStreams = packedStreams; 1214 1215 return folder; 1216 } 1217 1218 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 1219 final int areAllDefined = getUnsignedByte(header); 1220 final BitSet bits; 1221 if (areAllDefined != 0) { 1222 bits = new BitSet(size); 1223 for (int i = 0; i < size; i++) { 1224 bits.set(i, true); 1225 } 1226 } else { 1227 bits = readBits(header, size); 1228 } 1229 return bits; 1230 } 1231 1232 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1233 final BitSet bits = new BitSet(size); 1234 int mask = 0; 1235 int cache = 0; 1236 for (int i = 0; i < size; i++) { 1237 if (mask == 0) { 1238 mask = 0x80; 1239 cache = getUnsignedByte(header); 1240 } 1241 bits.set(i, (cache & mask) != 0); 1242 mask >>>= 1; 1243 } 1244 return bits; 1245 } 1246 1247 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1248 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1249 1250 int emptyStreams = -1; 1251 while (true) { 1252 final int propertyType = getUnsignedByte(header); 1253 if (propertyType == 0) { 1254 break; 1255 } 1256 final long size = readUint64(header); 1257 switch (propertyType) { 1258 case NID.kEmptyStream: { 1259 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1260 break; 1261 } 1262 case NID.kEmptyFile: { 1263 if (emptyStreams == -1) { 1264 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1265 } 1266 readBits(header, emptyStreams); 1267 break; 1268 } 1269 case NID.kAnti: { 1270 if (emptyStreams == -1) { 1271 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1272 } 1273 readBits(header, emptyStreams); 1274 break; 1275 } 1276 case NID.kName: { 1277 final int external = getUnsignedByte(header); 1278 if (external != 0) { 1279 throw new IOException("Not implemented"); 1280 } 1281 final int namesLength = 1282 assertFitsIntoNonNegativeInt("file names length", size - 1); 1283 if ((namesLength & 1) != 0) { 1284 throw new IOException("File names length invalid"); 1285 } 1286 1287 int filesSeen = 0; 1288 for (int i = 0; i < namesLength; i += 2) { 1289 final char c = getChar(header); 1290 if (c == 0) { 1291 filesSeen++; 1292 } 1293 } 1294 if (filesSeen != stats.numberOfEntries) { 1295 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " 1296 + stats.numberOfEntries + ")"); 1297 } 1298 break; 1299 } 1300 case NID.kCTime: { 1301 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1302 .cardinality(); 1303 final int external = getUnsignedByte(header); 1304 if (external != 0) { 1305 throw new IOException("Not implemented"); 1306 } 1307 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1308 throw new IOException("invalid creation dates size"); 1309 } 1310 break; 1311 } 1312 case NID.kATime: { 1313 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1314 .cardinality(); 1315 final int external = getUnsignedByte(header); 1316 if (external != 0) { 1317 throw new IOException("Not implemented"); 1318 } 1319 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1320 throw new IOException("invalid access dates size"); 1321 } 1322 break; 1323 } 1324 case NID.kMTime: { 1325 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1326 .cardinality(); 1327 final int external = getUnsignedByte(header); 1328 if (external != 0) { 1329 throw new IOException("Not implemented"); 1330 } 1331 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1332 throw new IOException("invalid modification dates size"); 1333 } 1334 break; 1335 } 1336 case NID.kWinAttributes: { 1337 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries) 1338 .cardinality(); 1339 final int external = getUnsignedByte(header); 1340 if (external != 0) { 1341 throw new IOException("Not implemented"); 1342 } 1343 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1344 throw new IOException("invalid windows attributes size"); 1345 } 1346 break; 1347 } 1348 case NID.kStartPos: { 1349 throw new IOException("kStartPos is unsupported, please report"); 1350 } 1351 case NID.kDummy: { 1352 // 7z 9.20 asserts the content is all zeros and ignores the property 1353 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1354 1355 if (skipBytesFully(header, size) < size) { 1356 throw new IOException("Incomplete kDummy property"); 1357 } 1358 break; 1359 } 1360 1361 default: { 1362 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1363 if (skipBytesFully(header, size) < size) { 1364 throw new IOException("Incomplete property of type " + propertyType); 1365 } 1366 break; 1367 } 1368 } 1369 } 1370 stats.numberOfEntriesWithStream = stats.numberOfEntries - Math.max(emptyStreams, 0); 1371 } 1372 1373 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1374 final int numFilesInt = (int) readUint64(header); 1375 final Map<Integer, SevenZArchiveEntry> fileMap = new LinkedHashMap<>(); 1376 BitSet isEmptyStream = null; 1377 BitSet isEmptyFile = null; 1378 BitSet isAnti = null; 1379 while (true) { 1380 final int propertyType = getUnsignedByte(header); 1381 if (propertyType == 0) { 1382 break; 1383 } 1384 final long size = readUint64(header); 1385 switch (propertyType) { 1386 case NID.kEmptyStream: { 1387 isEmptyStream = readBits(header, numFilesInt); 1388 break; 1389 } 1390 case NID.kEmptyFile: { 1391 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 1392 break; 1393 } 1394 case NID.kAnti: { 1395 isAnti = readBits(header, isEmptyStream.cardinality()); 1396 break; 1397 } 1398 case NID.kName: { 1399 /* final int external = */ getUnsignedByte(header); 1400 final byte[] names = new byte[(int) (size - 1)]; 1401 final int namesLength = names.length; 1402 get(header, names); 1403 int nextFile = 0; 1404 int nextName = 0; 1405 for (int i = 0; i < namesLength; i += 2) { 1406 if (names[i] == 0 && names[i + 1] == 0) { 1407 checkEntryIsInitialized(fileMap, nextFile); 1408 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, UTF_16LE)); 1409 nextName = i + 2; 1410 nextFile++; 1411 } 1412 } 1413 if (nextName != namesLength || nextFile != numFilesInt) { 1414 throw new IOException("Error parsing file names"); 1415 } 1416 break; 1417 } 1418 case NID.kCTime: { 1419 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1420 /* final int external = */ getUnsignedByte(header); 1421 for (int i = 0; i < numFilesInt; i++) { 1422 checkEntryIsInitialized(fileMap, i); 1423 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1424 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1425 if (entryAtIndex.getHasCreationDate()) { 1426 entryAtIndex.setCreationDate(getLong(header)); 1427 } 1428 } 1429 break; 1430 } 1431 case NID.kATime: { 1432 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1433 /* final int external = */ getUnsignedByte(header); 1434 for (int i = 0; i < numFilesInt; i++) { 1435 checkEntryIsInitialized(fileMap, i); 1436 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1437 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1438 if (entryAtIndex.getHasAccessDate()) { 1439 entryAtIndex.setAccessDate(getLong(header)); 1440 } 1441 } 1442 break; 1443 } 1444 case NID.kMTime: { 1445 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1446 /* final int external = */ getUnsignedByte(header); 1447 for (int i = 0; i < numFilesInt; i++) { 1448 checkEntryIsInitialized(fileMap, i); 1449 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1450 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1451 if (entryAtIndex.getHasLastModifiedDate()) { 1452 entryAtIndex.setLastModifiedDate(getLong(header)); 1453 } 1454 } 1455 break; 1456 } 1457 case NID.kWinAttributes: { 1458 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1459 /* final int external = */ getUnsignedByte(header); 1460 for (int i = 0; i < numFilesInt; i++) { 1461 checkEntryIsInitialized(fileMap, i); 1462 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1463 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1464 if (entryAtIndex.getHasWindowsAttributes()) { 1465 entryAtIndex.setWindowsAttributes(getInt(header)); 1466 } 1467 } 1468 break; 1469 } 1470 case NID.kDummy: { 1471 // 7z 9.20 asserts the content is all zeros and ignores the property 1472 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1473 1474 skipBytesFully(header, size); 1475 break; 1476 } 1477 1478 default: { 1479 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1480 skipBytesFully(header, size); 1481 break; 1482 } 1483 } 1484 } 1485 int nonEmptyFileCounter = 0; 1486 int emptyFileCounter = 0; 1487 for (int i = 0; i < numFilesInt; i++) { 1488 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1489 if (entryAtIndex == null) { 1490 continue; 1491 } 1492 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1493 if (entryAtIndex.hasStream()) { 1494 if (archive.subStreamsInfo == null) { 1495 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1496 } 1497 entryAtIndex.setDirectory(false); 1498 entryAtIndex.setAntiItem(false); 1499 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1500 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1501 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1502 if (entryAtIndex.getSize() < 0) { 1503 throw new IOException("broken archive, entry with negative size"); 1504 } 1505 ++nonEmptyFileCounter; 1506 } else { 1507 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1508 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1509 entryAtIndex.setHasCrc(false); 1510 entryAtIndex.setSize(0); 1511 ++emptyFileCounter; 1512 } 1513 } 1514 archive.files = fileMap.values().stream().filter(Objects::nonNull).toArray(SevenZArchiveEntry[]::new); 1515 calculateStreamMap(archive); 1516 } 1517 1518 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 1519 if (archiveEntries.get(index) == null) { 1520 archiveEntries.put(index, new SevenZArchiveEntry()); 1521 } 1522 } 1523 1524 private void calculateStreamMap(final Archive archive) throws IOException { 1525 final StreamMap streamMap = new StreamMap(); 1526 1527 int nextFolderPackStreamIndex = 0; 1528 final int numFolders = archive.folders != null ? archive.folders.length : 0; 1529 streamMap.folderFirstPackStreamIndex = new int[numFolders]; 1530 for (int i = 0; i < numFolders; i++) { 1531 streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 1532 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 1533 } 1534 1535 long nextPackStreamOffset = 0; 1536 final int numPackSizes = archive.packSizes.length; 1537 streamMap.packStreamOffsets = new long[numPackSizes]; 1538 for (int i = 0; i < numPackSizes; i++) { 1539 streamMap.packStreamOffsets[i] = nextPackStreamOffset; 1540 nextPackStreamOffset += archive.packSizes[i]; 1541 } 1542 1543 streamMap.folderFirstFileIndex = new int[numFolders]; 1544 streamMap.fileFolderIndex = new int[archive.files.length]; 1545 int nextFolderIndex = 0; 1546 int nextFolderUnpackStreamIndex = 0; 1547 for (int i = 0; i < archive.files.length; i++) { 1548 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 1549 streamMap.fileFolderIndex[i] = -1; 1550 continue; 1551 } 1552 if (nextFolderUnpackStreamIndex == 0) { 1553 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 1554 streamMap.folderFirstFileIndex[nextFolderIndex] = i; 1555 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 1556 break; 1557 } 1558 } 1559 if (nextFolderIndex >= archive.folders.length) { 1560 throw new IOException("Too few folders in archive"); 1561 } 1562 } 1563 streamMap.fileFolderIndex[i] = nextFolderIndex; 1564 if (!archive.files[i].hasStream()) { 1565 continue; 1566 } 1567 ++nextFolderUnpackStreamIndex; 1568 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 1569 ++nextFolderIndex; 1570 nextFolderUnpackStreamIndex = 0; 1571 } 1572 } 1573 1574 archive.streamMap = streamMap; 1575 } 1576 1577 /** 1578 * Build the decoding stream for the entry to be read. 1579 * This method may be called from a random access(getInputStream) or 1580 * sequential access(getNextEntry). 1581 * If this method is called from a random access, some entries may 1582 * need to be skipped(we put them to the deferredBlockStreams and 1583 * skip them when actually needed to improve the performance) 1584 * 1585 * @param entryIndex the index of the entry to be read 1586 * @param isRandomAccess is this called in a random access 1587 * @throws IOException if there are exceptions when reading the file 1588 */ 1589 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 1590 if (archive.streamMap == null) { 1591 throw new IOException("Archive doesn't contain stream information to read entries"); 1592 } 1593 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1594 if (folderIndex < 0) { 1595 deferredBlockStreams.clear(); 1596 // TODO: previously it'd return an empty stream? 1597 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 1598 return; 1599 } 1600 final SevenZArchiveEntry file = archive.files[entryIndex]; 1601 boolean isInSameFolder = false; 1602 if (currentFolderIndex == folderIndex) { 1603 // (COMPRESS-320). 1604 // The current entry is within the same (potentially opened) folder. The 1605 // previous stream has to be fully decoded before we can start reading 1606 // but don't do it eagerly -- if the user skips over the entire folder nothing 1607 // is effectively decompressed. 1608 if (entryIndex > 0) { 1609 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 1610 } 1611 1612 // if this is called in a random access, then the content methods of previous entry may be null 1613 // the content methods should be set to methods of the first entry as it must not be null, 1614 // and the content methods would only be set if the content methods was not set 1615 if(isRandomAccess && file.getContentMethods() == null) { 1616 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 1617 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 1618 file.setContentMethods(folderFirstFile.getContentMethods()); 1619 } 1620 isInSameFolder = true; 1621 } else { 1622 currentFolderIndex = folderIndex; 1623 // We're opening a new folder. Discard any queued streams/ folder stream. 1624 reopenFolderInputStream(folderIndex, file); 1625 } 1626 1627 boolean haveSkippedEntries = false; 1628 if (isRandomAccess) { 1629 // entries will only need to be skipped if it's a random access 1630 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 1631 } 1632 1633 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 1634 // we don't need to add another entry to the deferredBlockStreams when : 1635 // 1. If this method is called in a random access and the entry index 1636 // to be read equals to the current entry index, the input stream 1637 // has already been put in the deferredBlockStreams 1638 // 2. If this entry has not been read(which means no entries are skipped) 1639 return; 1640 } 1641 1642 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 1643 if (file.getHasCrc()) { 1644 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 1645 } 1646 1647 deferredBlockStreams.add(fileStream); 1648 } 1649 1650 /** 1651 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1652 * 1653 * @param folderIndex the index of the folder to reopen 1654 * @param file the 7z entry to read 1655 * @throws IOException if exceptions occur when reading the 7z file 1656 */ 1657 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1658 deferredBlockStreams.clear(); 1659 if (currentFolderInputStream != null) { 1660 currentFolderInputStream.close(); 1661 currentFolderInputStream = null; 1662 } 1663 final Folder folder = archive.folders[folderIndex]; 1664 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1665 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 1666 archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1667 1668 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1669 } 1670 1671 /** 1672 * Skip all the entries if needed. 1673 * Entries need to be skipped when: 1674 * <p> 1675 * 1. it's a random access 1676 * 2. one of these 2 condition is meet : 1677 * <p> 1678 * 2.1 currentEntryIndex != entryIndex : this means there are some entries 1679 * to be skipped(currentEntryIndex < entryIndex) or the entry has already 1680 * been read(currentEntryIndex > entryIndex) 1681 * <p> 1682 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: 1683 * if the entry to be read is the current entry, but some data of it has 1684 * been read before, then we need to reopen the stream of the folder and 1685 * skip all the entries before the current entries 1686 * 1687 * @param entryIndex the entry to be read 1688 * @param isInSameFolder are the entry to be read and the current entry in the same folder 1689 * @param folderIndex the index of the folder which contains the entry 1690 * @return true if there are entries actually skipped 1691 * @throws IOException there are exceptions when skipping entries 1692 * @since 1.21 1693 */ 1694 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 1695 final SevenZArchiveEntry file = archive.files[entryIndex]; 1696 // if the entry to be read is the current entry, and the entry has not 1697 // been read yet, then there's nothing we need to do 1698 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 1699 return false; 1700 } 1701 1702 // 1. if currentEntryIndex < entryIndex : 1703 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 1704 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 1705 // this means the entry has already been read before, and we need to reopen the 1706 // stream of the folder and skip all the entries before the current entries 1707 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 1708 if (isInSameFolder) { 1709 if (currentEntryIndex < entryIndex) { 1710 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 1711 filesToSkipStartIndex = currentEntryIndex + 1; 1712 } else { 1713 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 1714 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 1715 // from the start entry of the folder again 1716 reopenFolderInputStream(folderIndex, file); 1717 } 1718 } 1719 1720 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 1721 final SevenZArchiveEntry fileToSkip = archive.files[i]; 1722 InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); 1723 if (fileToSkip.getHasCrc()) { 1724 fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); 1725 } 1726 deferredBlockStreams.add(fileStreamToSkip); 1727 1728 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 1729 fileToSkip.setContentMethods(file.getContentMethods()); 1730 } 1731 return true; 1732 } 1733 1734 /** 1735 * Find out if any data of current entry has been read or not. 1736 * This is achieved by comparing the bytes remaining to read 1737 * and the size of the file. 1738 * 1739 * @return true if any data of current entry has been read 1740 * @since 1.21 1741 */ 1742 private boolean hasCurrentEntryBeenRead() { 1743 boolean hasCurrentEntryBeenRead = false; 1744 if (!deferredBlockStreams.isEmpty()) { 1745 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 1746 // get the bytes remaining to read, and compare it with the size of 1747 // the file to figure out if the file has been read 1748 if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { 1749 hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1750 } 1751 1752 if (currentEntryInputStream instanceof BoundedInputStream) { 1753 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1754 } 1755 } 1756 return hasCurrentEntryBeenRead; 1757 } 1758 1759 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, 1760 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { 1761 channel.position(folderOffset); 1762 InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( 1763 new BoundedSeekableByteChannelInputStream(channel, 1764 archive.packSizes[firstPackStreamIndex]))) { 1765 @Override 1766 public int read() throws IOException { 1767 final int r = in.read(); 1768 if (r >= 0) { 1769 count(1); 1770 } 1771 return r; 1772 } 1773 @Override 1774 public int read(final byte[] b) throws IOException { 1775 return read(b, 0, b.length); 1776 } 1777 @Override 1778 public int read(final byte[] b, final int off, final int len) throws IOException { 1779 if (len == 0) { 1780 return 0; 1781 } 1782 final int r = in.read(b, off, len); 1783 if (r >= 0) { 1784 count(r); 1785 } 1786 return r; 1787 } 1788 private void count(final int c) { 1789 compressedBytesReadFromCurrentEntry += c; 1790 } 1791 }; 1792 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 1793 for (final Coder coder : folder.getOrderedCoders()) { 1794 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1795 throw new IOException("Multi input/output stream coders are not yet supported"); 1796 } 1797 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 1798 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, 1799 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 1800 methods.addFirst(new SevenZMethodConfiguration(method, 1801 Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 1802 } 1803 entry.setContentMethods(methods); 1804 if (folder.hasCrc) { 1805 return new CRC32VerifyingInputStream(inputStreamStack, 1806 folder.getUnpackSize(), folder.crc); 1807 } 1808 return inputStreamStack; 1809 } 1810 1811 /** 1812 * Reads a byte of data. 1813 * 1814 * @return the byte read, or -1 if end of input is reached 1815 * @throws IOException 1816 * if an I/O error has occurred 1817 */ 1818 public int read() throws IOException { 1819 final int b = getCurrentStream().read(); 1820 if (b >= 0) { 1821 uncompressedBytesReadFromCurrentEntry++; 1822 } 1823 return b; 1824 } 1825 1826 private InputStream getCurrentStream() throws IOException { 1827 if (archive.files[currentEntryIndex].getSize() == 0) { 1828 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 1829 } 1830 if (deferredBlockStreams.isEmpty()) { 1831 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 1832 } 1833 1834 while (deferredBlockStreams.size() > 1) { 1835 // In solid compression mode we need to decompress all leading folder' 1836 // streams to get access to an entry. We defer this until really needed 1837 // so that entire blocks can be skipped without wasting time for decompression. 1838 try (final InputStream stream = deferredBlockStreams.remove(0)) { 1839 IOUtils.skip(stream, Long.MAX_VALUE); 1840 } 1841 compressedBytesReadFromCurrentEntry = 0; 1842 } 1843 1844 return deferredBlockStreams.get(0); 1845 } 1846 1847 /** 1848 * Returns an InputStream for reading the contents of the given entry. 1849 * 1850 * <p>For archives using solid compression randomly accessing 1851 * entries will be significantly slower than reading the archive 1852 * sequentially.</p> 1853 * 1854 * @param entry the entry to get the stream for. 1855 * @return a stream to read the entry from. 1856 * @throws IOException if unable to create an input stream from the zipentry 1857 * @since 1.20 1858 */ 1859 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 1860 int entryIndex = -1; 1861 for (int i = 0; i < this.archive.files.length;i++) { 1862 if (entry == this.archive.files[i]) { 1863 entryIndex = i; 1864 break; 1865 } 1866 } 1867 1868 if (entryIndex < 0) { 1869 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); 1870 } 1871 1872 buildDecodingStream(entryIndex, true); 1873 currentEntryIndex = entryIndex; 1874 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1875 return getCurrentStream(); 1876 } 1877 1878 /** 1879 * Reads data into an array of bytes. 1880 * 1881 * @param b the array to write data to 1882 * @return the number of bytes read, or -1 if end of input is reached 1883 * @throws IOException 1884 * if an I/O error has occurred 1885 */ 1886 public int read(final byte[] b) throws IOException { 1887 return read(b, 0, b.length); 1888 } 1889 1890 /** 1891 * Reads data into an array of bytes. 1892 * 1893 * @param b the array to write data to 1894 * @param off offset into the buffer to start filling at 1895 * @param len of bytes to read 1896 * @return the number of bytes read, or -1 if end of input is reached 1897 * @throws IOException 1898 * if an I/O error has occurred 1899 */ 1900 public int read(final byte[] b, final int off, final int len) throws IOException { 1901 if (len == 0) { 1902 return 0; 1903 } 1904 final int cnt = getCurrentStream().read(b, off, len); 1905 if (cnt > 0) { 1906 uncompressedBytesReadFromCurrentEntry += cnt; 1907 } 1908 return cnt; 1909 } 1910 1911 /** 1912 * Provides statistics for bytes read from the current entry. 1913 * 1914 * @return statistics for bytes read from the current entry 1915 * @since 1.17 1916 */ 1917 public InputStreamStatistics getStatisticsForCurrentEntry() { 1918 return new InputStreamStatistics() { 1919 @Override 1920 public long getCompressedCount() { 1921 return compressedBytesReadFromCurrentEntry; 1922 } 1923 @Override 1924 public long getUncompressedCount() { 1925 return uncompressedBytesReadFromCurrentEntry; 1926 } 1927 }; 1928 } 1929 1930 private static long readUint64(final ByteBuffer in) throws IOException { 1931 // long rather than int as it might get shifted beyond the range of an int 1932 final long firstByte = getUnsignedByte(in); 1933 int mask = 0x80; 1934 long value = 0; 1935 for (int i = 0; i < 8; i++) { 1936 if ((firstByte & mask) == 0) { 1937 return value | (firstByte & mask - 1) << 8 * i; 1938 } 1939 final long nextByte = getUnsignedByte(in); 1940 value |= nextByte << 8 * i; 1941 mask >>>= 1; 1942 } 1943 return value; 1944 } 1945 1946 private static char getChar(final ByteBuffer buf) throws IOException { 1947 if (buf.remaining() < 2) { 1948 throw new EOFException(); 1949 } 1950 return buf.getChar(); 1951 } 1952 1953 private static int getInt(final ByteBuffer buf) throws IOException { 1954 if (buf.remaining() < 4) { 1955 throw new EOFException(); 1956 } 1957 return buf.getInt(); 1958 } 1959 1960 private static long getLong(final ByteBuffer buf) throws IOException { 1961 if (buf.remaining() < 8) { 1962 throw new EOFException(); 1963 } 1964 return buf.getLong(); 1965 } 1966 1967 private static void get(final ByteBuffer buf, final byte[] to) throws IOException { 1968 if (buf.remaining() < to.length) { 1969 throw new EOFException(); 1970 } 1971 buf.get(to); 1972 } 1973 1974 private static int getUnsignedByte(final ByteBuffer buf) throws IOException { 1975 if (!buf.hasRemaining()) { 1976 throw new EOFException(); 1977 } 1978 return buf.get() & 0xff; 1979 } 1980 1981 /** 1982 * Checks if the signature matches what is expected for a 7z file. 1983 * 1984 * @param signature 1985 * the bytes to check 1986 * @param length 1987 * the number of bytes to check 1988 * @return true, if this is the signature of a 7z archive. 1989 * @since 1.8 1990 */ 1991 public static boolean matches(final byte[] signature, final int length) { 1992 if (length < sevenZSignature.length) { 1993 return false; 1994 } 1995 1996 for (int i = 0; i < sevenZSignature.length; i++) { 1997 if (signature[i] != sevenZSignature[i]) { 1998 return false; 1999 } 2000 } 2001 return true; 2002 } 2003 2004 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) { 2005 if (bytesToSkip < 1) { 2006 return 0; 2007 } 2008 final int current = input.position(); 2009 final int maxSkip = input.remaining(); 2010 if (maxSkip < bytesToSkip) { 2011 bytesToSkip = maxSkip; 2012 } 2013 ((Buffer)input).position(current + (int) bytesToSkip); 2014 return bytesToSkip; 2015 } 2016 2017 private void readFully(final ByteBuffer buf) throws IOException { 2018 ((Buffer)buf).rewind(); 2019 IOUtils.readFully(channel, buf); 2020 ((Buffer)buf).flip(); 2021 } 2022 2023 @Override 2024 public String toString() { 2025 return archive.toString(); 2026 } 2027 2028 /** 2029 * Derives a default file name from the archive name - if known. 2030 * 2031 * <p>This implements the same heuristics the 7z tools use. In 2032 * 7z's case if an archive contains entries without a name - 2033 * i.e. {@link SevenZArchiveEntry#getName} returns {@code null} - 2034 * then its command line and GUI tools will use this default name 2035 * when extracting the entries.</p> 2036 * 2037 * @return null if the name of the archive is unknown. Otherwise 2038 * if the name of the archive has got any extension, it is 2039 * stripped and the remainder returned. Finally if the name of the 2040 * archive hasn't got any extension then a {@code ~} character is 2041 * appended to the archive name. 2042 * 2043 * @since 1.19 2044 */ 2045 public String getDefaultName() { 2046 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 2047 return null; 2048 } 2049 2050 final String lastSegment = new File(fileName).getName(); 2051 final int dotPos = lastSegment.lastIndexOf("."); 2052 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 2053 return lastSegment.substring(0, dotPos); 2054 } 2055 return lastSegment + "~"; 2056 } 2057 2058 private static byte[] utf16Decode(final char[] chars) { 2059 if (chars == null) { 2060 return null; 2061 } 2062 final ByteBuffer encoded = UTF_16LE.encode(CharBuffer.wrap(chars)); 2063 if (encoded.hasArray()) { 2064 return encoded.array(); 2065 } 2066 final byte[] e = new byte[encoded.remaining()]; 2067 encoded.get(e); 2068 return e; 2069 } 2070 2071 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 2072 if (value > Integer.MAX_VALUE || value < 0) { 2073 throw new IOException("Cannot handle " + what + " " + value); 2074 } 2075 return (int) value; 2076 } 2077 2078 private static class ArchiveStatistics { 2079 private int numberOfPackedStreams; 2080 private long numberOfCoders; 2081 private long numberOfOutStreams; 2082 private long numberOfInStreams; 2083 private long numberOfUnpackSubStreams; 2084 private int numberOfFolders; 2085 private BitSet folderHasCrc; 2086 private int numberOfEntries; 2087 private int numberOfEntriesWithStream; 2088 2089 @Override 2090 public String toString() { 2091 return "Archive with " + numberOfEntries + " entries in " + numberOfFolders 2092 + " folders. Estimated size " + estimateSize()/ 1024L + " kB."; 2093 } 2094 2095 long estimateSize() { 2096 final long lowerBound = 16L * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 2097 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 2098 + numberOfFolders * folderSize() /* folders in Archive */ 2099 + numberOfCoders * coderSize() /* coders in Folder */ 2100 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 2101 + 8L * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 2102 + 8L * numberOfOutStreams /* unpackSizes in Folder */ 2103 + numberOfEntries * entrySize() /* files in Archive */ 2104 + streamMapSize() 2105 ; 2106 return 2 * lowerBound /* conservative guess */; 2107 } 2108 2109 void assertValidity(final int maxMemoryLimitInKb) throws IOException { 2110 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 2111 throw new IOException("archive with entries but no folders"); 2112 } 2113 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 2114 throw new IOException("archive doesn't contain enough substreams for entries"); 2115 } 2116 2117 final long memoryNeededInKb = estimateSize() / 1024; 2118 if (maxMemoryLimitInKb < memoryNeededInKb) { 2119 throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); 2120 } 2121 } 2122 2123 private long folderSize() { 2124 return 30; /* nested arrays are accounted for separately */ 2125 } 2126 2127 private long coderSize() { 2128 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 2129 + 16 2130 + 4 /* properties, guess */ 2131 ; 2132 } 2133 2134 private long bindPairSize() { 2135 return 16; 2136 } 2137 2138 private long entrySize() { 2139 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 2140 } 2141 2142 private long streamMapSize() { 2143 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 2144 + 8 * numberOfPackedStreams /* packStreamOffsets */ 2145 + 4 * numberOfEntries /* fileFolderIndex */ 2146 ; 2147 } 2148 } 2149}