001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.tar; 019 020import java.io.ByteArrayOutputStream; 021import java.io.Closeable; 022import java.io.File; 023import java.io.IOException; 024import java.io.InputStream; 025import java.nio.Buffer; 026import java.nio.ByteBuffer; 027import java.nio.channels.SeekableByteChannel; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.util.ArrayList; 031import java.util.HashMap; 032import java.util.LinkedList; 033import java.util.List; 034import java.util.Map; 035 036import org.apache.commons.compress.archivers.zip.ZipEncoding; 037import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 038import org.apache.commons.compress.utils.ArchiveUtils; 039import org.apache.commons.compress.utils.BoundedArchiveInputStream; 040import org.apache.commons.compress.utils.BoundedInputStream; 041import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 042import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 043 044/** 045 * The TarFile provides random access to UNIX archives. 046 * @since 1.21 047 */ 048public class TarFile implements Closeable { 049 050 private static final int SMALL_BUFFER_SIZE = 256; 051 052 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 053 054 private final SeekableByteChannel archive; 055 056 /** 057 * The encoding of the tar file 058 */ 059 private final ZipEncoding zipEncoding; 060 061 private final LinkedList<TarArchiveEntry> entries = new LinkedList<>(); 062 063 private final int blockSize; 064 065 private final boolean lenient; 066 067 private final int recordSize; 068 069 private final ByteBuffer recordBuffer; 070 071 // the global sparse headers, this is only used in PAX Format 0.X 072 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 073 074 private boolean hasHitEOF; 075 076 /** 077 * The meta-data about the current entry 078 */ 079 private TarArchiveEntry currEntry; 080 081 // the global PAX header 082 private Map<String, String> globalPaxHeaders = new HashMap<>(); 083 084 private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>(); 085 086 /** 087 * Constructor for TarFile. 088 * 089 * @param content the content to use 090 * @throws IOException when reading the tar archive fails 091 */ 092 public TarFile(final byte[] content) throws IOException { 093 this(new SeekableInMemoryByteChannel(content)); 094 } 095 096 /** 097 * Constructor for TarFile. 098 * 099 * @param content the content to use 100 * @param encoding the encoding to use 101 * @throws IOException when reading the tar archive fails 102 */ 103 public TarFile(final byte[] content, final String encoding) throws IOException { 104 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false); 105 } 106 107 /** 108 * Constructor for TarFile. 109 * 110 * @param content the content to use 111 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 112 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 113 * exception instead. 114 * @throws IOException when reading the tar archive fails 115 */ 116 public TarFile(final byte[] content, final boolean lenient) throws IOException { 117 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 118 } 119 120 /** 121 * Constructor for TarFile. 122 * 123 * @param archive the file of the archive to use 124 * @throws IOException when reading the tar archive fails 125 */ 126 public TarFile(final File archive) throws IOException { 127 this(archive.toPath()); 128 } 129 130 /** 131 * Constructor for TarFile. 132 * 133 * @param archive the file of the archive to use 134 * @param encoding the encoding to use 135 * @throws IOException when reading the tar archive fails 136 */ 137 public TarFile(final File archive, final String encoding) throws IOException { 138 this(archive.toPath(), encoding); 139 } 140 141 /** 142 * Constructor for TarFile. 143 * 144 * @param archive the file of the archive to use 145 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 146 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 147 * exception instead. 148 * @throws IOException when reading the tar archive fails 149 */ 150 public TarFile(final File archive, final boolean lenient) throws IOException { 151 this(archive.toPath(), lenient); 152 } 153 154 /** 155 * Constructor for TarFile. 156 * 157 * @param archivePath the path of the archive to use 158 * @throws IOException when reading the tar archive fails 159 */ 160 public TarFile(final Path archivePath) throws IOException { 161 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false); 162 } 163 164 /** 165 * Constructor for TarFile. 166 * 167 * @param archivePath the path of the archive to use 168 * @param encoding the encoding to use 169 * @throws IOException when reading the tar archive fails 170 */ 171 public TarFile(final Path archivePath, final String encoding) throws IOException { 172 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false); 173 } 174 175 /** 176 * Constructor for TarFile. 177 * 178 * @param archivePath the path of the archive to use 179 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 180 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 181 * exception instead. 182 * @throws IOException when reading the tar archive fails 183 */ 184 public TarFile(final Path archivePath, final boolean lenient) throws IOException { 185 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 186 } 187 188 /** 189 * Constructor for TarFile. 190 * 191 * @param content the content to use 192 * @throws IOException when reading the tar archive fails 193 */ 194 public TarFile(final SeekableByteChannel content) throws IOException { 195 this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false); 196 } 197 198 /** 199 * Constructor for TarFile. 200 * 201 * @param archive the seekable byte channel to use 202 * @param blockSize the blocks size to use 203 * @param recordSize the record size to use 204 * @param encoding the encoding to use 205 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 206 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 207 * exception instead. 208 * @throws IOException when reading the tar archive fails 209 */ 210 public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient) throws IOException { 211 this.archive = archive; 212 this.hasHitEOF = false; 213 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 214 this.recordSize = recordSize; 215 this.recordBuffer = ByteBuffer.allocate(this.recordSize); 216 this.blockSize = blockSize; 217 this.lenient = lenient; 218 219 TarArchiveEntry entry; 220 while ((entry = getNextTarEntry()) != null) { 221 entries.add(entry); 222 } 223 } 224 225 /** 226 * Get the next entry in this tar archive. This will skip 227 * to the end of the current entry, if there is one, and 228 * place the position of the channel at the header of the 229 * next entry, and read the header and instantiate a new 230 * TarEntry from the header bytes and return that entry. 231 * If there are no more entries in the archive, null will 232 * be returned to indicate that the end of the archive has 233 * been reached. 234 * 235 * @return The next TarEntry in the archive, or null if there is no next entry. 236 * @throws IOException when reading the next TarEntry fails 237 */ 238 private TarArchiveEntry getNextTarEntry() throws IOException { 239 if (isAtEOF()) { 240 return null; 241 } 242 243 if (currEntry != null) { 244 // Skip to the end of the entry 245 repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize()); 246 throwExceptionIfPositionIsNotInArchive(); 247 skipRecordPadding(); 248 } 249 250 final ByteBuffer headerBuf = getRecord(); 251 if (null == headerBuf) { 252 /* hit EOF */ 253 currEntry = null; 254 return null; 255 } 256 257 try { 258 final long position = archive.position(); 259 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position); 260 } catch (final IllegalArgumentException e) { 261 throw new IOException("Error detected parsing the header", e); 262 } 263 264 if (currEntry.isGNULongLinkEntry()) { 265 final byte[] longLinkData = getLongNameData(); 266 if (longLinkData == null) { 267 // Bugzilla: 40334 268 // Malformed tar file - long link entry name not followed by 269 // entry 270 return null; 271 } 272 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 273 } 274 275 if (currEntry.isGNULongNameEntry()) { 276 final byte[] longNameData = getLongNameData(); 277 if (longNameData == null) { 278 // Bugzilla: 40334 279 // Malformed tar file - long entry name not followed by 280 // entry 281 return null; 282 } 283 284 // COMPRESS-509 : the name of directories should end with '/' 285 final String name = zipEncoding.decode(longNameData); 286 currEntry.setName(name); 287 if (currEntry.isDirectory() && !name.endsWith("/")) { 288 currEntry.setName(name + "/"); 289 } 290 } 291 292 if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers 293 readGlobalPaxHeaders(); 294 } 295 296 try { 297 if (currEntry.isPaxHeader()) { // Process Pax headers 298 paxHeaders(); 299 } else if (!globalPaxHeaders.isEmpty()) { 300 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 301 } 302 } catch (NumberFormatException e) { 303 throw new IOException("Error detected parsing the pax header", e); 304 } 305 306 if (currEntry.isOldGNUSparse()) { // Process sparse files 307 readOldGNUSparse(); 308 } 309 310 return currEntry; 311 } 312 313 /** 314 * Adds the sparse chunks from the current entry to the sparse chunks, 315 * including any additional sparse entries following the current entry. 316 * 317 * @throws IOException when reading the sparse entry fails 318 */ 319 private void readOldGNUSparse() throws IOException { 320 if (currEntry.isExtended()) { 321 TarArchiveSparseEntry entry; 322 do { 323 final ByteBuffer headerBuf = getRecord(); 324 if (headerBuf == null) { 325 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 326 } 327 entry = new TarArchiveSparseEntry(headerBuf.array()); 328 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 329 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); 330 } while (entry.isExtended()); 331 } 332 333 // sparse headers are all done reading, we need to build 334 // sparse input streams using these sparse headers 335 buildSparseInputStreams(); 336 } 337 338 /** 339 * Build the input streams consisting of all-zero input streams and non-zero input streams. 340 * When reading from the non-zero input streams, the data is actually read from the original input stream. 341 * The size of each input stream is introduced by the sparse headers. 342 * 343 * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 344 * 0 size input streams because they are meaningless. 345 */ 346 private void buildSparseInputStreams() throws IOException { 347 final List<InputStream> streams = new ArrayList<>(); 348 349 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 350 351 // Stream doesn't need to be closed at all as it doesn't use any resources 352 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR 353 // logical offset into the extracted entry 354 long offset = 0; 355 long numberOfZeroBytesInSparseEntry = 0; 356 for (TarArchiveStructSparse sparseHeader : sparseHeaders) { 357 final long zeroBlockSize = sparseHeader.getOffset() - offset; 358 if (zeroBlockSize < 0) { 359 // sparse header says to move backwards inside of the extracted entry 360 throw new IOException("Corrupted struct sparse detected"); 361 } 362 363 // only store the zero block if it is not empty 364 if (zeroBlockSize > 0) { 365 streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize)); 366 numberOfZeroBytesInSparseEntry += zeroBlockSize; 367 } 368 369 // only store the input streams with non-zero size 370 if (sparseHeader.getNumbytes() > 0) { 371 final long start = 372 currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry; 373 if (start + sparseHeader.getNumbytes() < start) { 374 // possible integer overflow 375 throw new IOException("Unreadable TAR archive, sparse block offset or length too big"); 376 } 377 streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive)); 378 } 379 380 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 381 } 382 383 sparseInputStreams.put(currEntry.getName(), streams); 384 } 385 386 /** 387 * Update the current entry with the read pax headers 388 * @param headers Headers read from the pax header 389 * @param sparseHeaders Sparse headers read from pax header 390 */ 391 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) 392 throws IOException { 393 currEntry.updateEntryFromPaxHeaders(headers); 394 currEntry.setSparseHeaders(sparseHeaders); 395 } 396 397 /** 398 * <p> 399 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 400 * may appear multi times, and they look like: 401 * <pre> 402 * GNU.sparse.size=size 403 * GNU.sparse.numblocks=numblocks 404 * repeat numblocks times 405 * GNU.sparse.offset=offset 406 * GNU.sparse.numbytes=numbytes 407 * end repeat 408 * </pre> 409 * 410 * <p> 411 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 412 * <pre> 413 * GNU.sparse.map 414 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 415 * </pre> 416 * 417 * <p> 418 * For PAX Format 1.X: 419 * <br> 420 * The sparse map itself is stored in the file data block, preceding the actual file data. 421 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 422 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 423 * giving the offset and size of the data block it describes. 424 * @throws IOException 425 */ 426 private void paxHeaders() throws IOException { 427 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 428 final Map<String, String> headers; 429 try (final InputStream input = getInputStream(currEntry)) { 430 headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize()); 431 } 432 433 // for 0.1 PAX Headers 434 if (headers.containsKey("GNU.sparse.map")) { 435 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get("GNU.sparse.map"))); 436 } 437 getNextTarEntry(); // Get the actual file entry 438 if (currEntry == null) { 439 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 440 } 441 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 442 443 // for 1.0 PAX Format, the sparse map is stored in the file data block 444 if (currEntry.isPaxGNU1XSparse()) { 445 try (final InputStream input = getInputStream(currEntry)) { 446 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize); 447 } 448 currEntry.setSparseHeaders(sparseHeaders); 449 // data of the entry is after the pax gnu entry. So we need to update the data position once again 450 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); 451 } 452 453 // sparse headers are all done reading, we need to build 454 // sparse input streams using these sparse headers 455 buildSparseInputStreams(); 456 } 457 458 private void readGlobalPaxHeaders() throws IOException { 459 try (InputStream input = getInputStream(currEntry)) { 460 globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders, 461 currEntry.getSize()); 462 } 463 getNextTarEntry(); // Get the actual file entry 464 465 if (currEntry == null) { 466 throw new IOException("Error detected parsing the pax header"); 467 } 468 } 469 470 /** 471 * Get the next entry in this tar archive as longname data. 472 * 473 * @return The next entry in the archive as longname data, or null. 474 * @throws IOException on error 475 */ 476 private byte[] getLongNameData() throws IOException { 477 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 478 int length; 479 try (final InputStream in = getInputStream(currEntry)) { 480 while ((length = in.read(smallBuf)) >= 0) { 481 longName.write(smallBuf, 0, length); 482 } 483 } 484 getNextTarEntry(); 485 if (currEntry == null) { 486 // Bugzilla: 40334 487 // Malformed tar file - long entry name not followed by entry 488 return null; 489 } 490 byte[] longNameData = longName.toByteArray(); 491 // remove trailing null terminator(s) 492 length = longNameData.length; 493 while (length > 0 && longNameData[length - 1] == 0) { 494 --length; 495 } 496 if (length != longNameData.length) { 497 final byte[] l = new byte[length]; 498 System.arraycopy(longNameData, 0, l, 0, length); 499 longNameData = l; 500 } 501 return longNameData; 502 } 503 504 /** 505 * The last record block should be written at the full size, so skip any 506 * additional space used to fill a record after an entry 507 * 508 * @throws IOException when skipping the padding of the record fails 509 */ 510 private void skipRecordPadding() throws IOException { 511 if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) { 512 final long numRecords = (currEntry.getSize() / recordSize) + 1; 513 final long padding = (numRecords * recordSize) - currEntry.getSize(); 514 repositionForwardBy(padding); 515 throwExceptionIfPositionIsNotInArchive(); 516 } 517 } 518 519 private void repositionForwardTo(final long newPosition) throws IOException { 520 final long currPosition = archive.position(); 521 if (newPosition < currPosition) { 522 throw new IOException("trying to move backwards inside of the archive"); 523 } 524 archive.position(newPosition); 525 } 526 527 private void repositionForwardBy(final long offset) throws IOException { 528 repositionForwardTo(archive.position() + offset); 529 } 530 531 /** 532 * Checks if the current position of the SeekableByteChannel is in the archive. 533 * @throws IOException If the position is not in the archive 534 */ 535 private void throwExceptionIfPositionIsNotInArchive() throws IOException { 536 if (archive.size() < archive.position()) { 537 throw new IOException("Truncated TAR archive"); 538 } 539 } 540 541 /** 542 * Get the next record in this tar archive. This will skip 543 * over any remaining data in the current entry, if there 544 * is one, and place the input stream at the header of the 545 * next entry. 546 * 547 * <p>If there are no more entries in the archive, null will be 548 * returned to indicate that the end of the archive has been 549 * reached. At the same time the {@code hasHitEOF} marker will be 550 * set to true.</p> 551 * 552 * @return The next TarEntry in the archive, or null if there is no next entry. 553 * @throws IOException when reading the next TarEntry fails 554 */ 555 private ByteBuffer getRecord() throws IOException { 556 ByteBuffer headerBuf = readRecord(); 557 setAtEOF(isEOFRecord(headerBuf)); 558 if (isAtEOF() && headerBuf != null) { 559 // Consume rest 560 tryToConsumeSecondEOFRecord(); 561 consumeRemainderOfLastBlock(); 562 headerBuf = null; 563 } 564 return headerBuf; 565 } 566 567 /** 568 * Tries to read the next record resetting the position in the 569 * archive if it is not a EOF record. 570 * 571 * <p>This is meant to protect against cases where a tar 572 * implementation has written only one EOF record when two are 573 * expected. Actually this won't help since a non-conforming 574 * implementation likely won't fill full blocks consisting of - by 575 * default - ten records either so we probably have already read 576 * beyond the archive anyway.</p> 577 * 578 * @throws IOException if reading the record of resetting the position in the archive fails 579 */ 580 private void tryToConsumeSecondEOFRecord() throws IOException { 581 boolean shouldReset = true; 582 try { 583 shouldReset = !isEOFRecord(readRecord()); 584 } finally { 585 if (shouldReset) { 586 archive.position(archive.position() - recordSize); 587 } 588 } 589 } 590 591 /** 592 * This method is invoked once the end of the archive is hit, it 593 * tries to consume the remaining bytes under the assumption that 594 * the tool creating this archive has padded the last block. 595 */ 596 private void consumeRemainderOfLastBlock() throws IOException { 597 final long bytesReadOfLastBlock = archive.position() % blockSize; 598 if (bytesReadOfLastBlock > 0) { 599 repositionForwardBy(blockSize - bytesReadOfLastBlock); 600 } 601 } 602 603 /** 604 * Read a record from the input stream and return the data. 605 * 606 * @return The record data or null if EOF has been hit. 607 * @throws IOException if reading from the archive fails 608 */ 609 private ByteBuffer readRecord() throws IOException { 610 ((Buffer)recordBuffer).rewind(); 611 final int readNow = archive.read(recordBuffer); 612 if (readNow != recordSize) { 613 return null; 614 } 615 return recordBuffer; 616 } 617 618 /** 619 * Get all TAR Archive Entries from the TarFile 620 * 621 * @return All entries from the tar file 622 */ 623 public List<TarArchiveEntry> getEntries() { 624 return new ArrayList<>(entries); 625 } 626 627 private boolean isEOFRecord(final ByteBuffer headerBuf) { 628 return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize); 629 } 630 631 protected final boolean isAtEOF() { 632 return hasHitEOF; 633 } 634 635 protected final void setAtEOF(final boolean b) { 636 hasHitEOF = b; 637 } 638 639 private boolean isDirectory() { 640 return currEntry != null && currEntry.isDirectory(); 641 } 642 643 /** 644 * Gets the input stream for the provided Tar Archive Entry. 645 * @param entry Entry to get the input stream from 646 * @return Input stream of the provided entry 647 * @throws IOException Corrupted TAR archive. Can't read entry. 648 */ 649 public InputStream getInputStream(final TarArchiveEntry entry) throws IOException { 650 try { 651 return new BoundedTarEntryInputStream(entry, archive); 652 } catch (RuntimeException ex) { 653 throw new IOException("Corrupted TAR archive. Can't read entry", ex); 654 } 655 } 656 657 @Override 658 public void close() throws IOException { 659 archive.close(); 660 } 661 662 private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream { 663 664 private final SeekableByteChannel channel; 665 666 private final TarArchiveEntry entry; 667 668 private long entryOffset; 669 670 private int currentSparseInputStreamIndex; 671 672 BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException { 673 super(entry.getDataOffset(), entry.getRealSize()); 674 if (channel.size() - entry.getSize() < entry.getDataOffset()) { 675 throw new IOException("entry size exceeds archive size"); 676 } 677 this.entry = entry; 678 this.channel = channel; 679 } 680 681 @Override 682 protected int read(final long pos, final ByteBuffer buf) throws IOException { 683 if (entryOffset >= entry.getRealSize()) { 684 return -1; 685 } 686 687 final int totalRead; 688 if (entry.isSparse()) { 689 totalRead = readSparse(entryOffset, buf, ((Buffer)buf).limit()); 690 } else { 691 totalRead = readArchive(pos, buf); 692 } 693 694 if (totalRead == -1) { 695 if (buf.array().length > 0) { 696 throw new IOException("Truncated TAR archive"); 697 } 698 setAtEOF(true); 699 } else { 700 entryOffset += totalRead; 701 ((Buffer)buf).flip(); 702 } 703 return totalRead; 704 } 705 706 private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException { 707 // if there are no actual input streams, just read from the original archive 708 final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName()); 709 if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) { 710 return readArchive(entry.getDataOffset() + pos, buf); 711 } 712 713 if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) { 714 return -1; 715 } 716 717 final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex); 718 final byte[] bufArray = new byte[numToRead]; 719 final int readLen = currentInputStream.read(bufArray); 720 if (readLen != -1) { 721 buf.put(bufArray, 0, readLen); 722 } 723 724 // if the current input stream is the last input stream, 725 // just return the number of bytes read from current input stream 726 if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) { 727 return readLen; 728 } 729 730 // if EOF of current input stream is meet, open a new input stream and recursively call read 731 if (readLen == -1) { 732 currentSparseInputStreamIndex++; 733 return readSparse(pos, buf, numToRead); 734 } 735 736 // if the rest data of current input stream is not long enough, open a new input stream 737 // and recursively call read 738 if (readLen < numToRead) { 739 currentSparseInputStreamIndex++; 740 final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen); 741 if (readLenOfNext == -1) { 742 return readLen; 743 } 744 745 return readLen + readLenOfNext; 746 } 747 748 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 749 return readLen; 750 } 751 752 private int readArchive(final long pos, final ByteBuffer buf) throws IOException { 753 channel.position(pos); 754 return channel.read(buf); 755 } 756 } 757}