001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.tar;
019
020import java.io.ByteArrayOutputStream;
021import java.io.Closeable;
022import java.io.File;
023import java.io.IOException;
024import java.io.InputStream;
025import java.nio.Buffer;
026import java.nio.ByteBuffer;
027import java.nio.channels.SeekableByteChannel;
028import java.nio.file.Files;
029import java.nio.file.Path;
030import java.util.ArrayList;
031import java.util.HashMap;
032import java.util.LinkedList;
033import java.util.List;
034import java.util.Map;
035
036import org.apache.commons.compress.archivers.zip.ZipEncoding;
037import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
038import org.apache.commons.compress.utils.ArchiveUtils;
039import org.apache.commons.compress.utils.BoundedArchiveInputStream;
040import org.apache.commons.compress.utils.BoundedInputStream;
041import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
042import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
043
044/**
045 * The TarFile provides random access to UNIX archives.
046 * @since 1.21
047 */
048public class TarFile implements Closeable {
049
050    private static final int SMALL_BUFFER_SIZE = 256;
051
052    private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
053
054    private final SeekableByteChannel archive;
055
056    /**
057     * The encoding of the tar file
058     */
059    private final ZipEncoding zipEncoding;
060
061    private final LinkedList<TarArchiveEntry> entries = new LinkedList<>();
062
063    private final int blockSize;
064
065    private final boolean lenient;
066
067    private final int recordSize;
068
069    private final ByteBuffer recordBuffer;
070
071    // the global sparse headers, this is only used in PAX Format 0.X
072    private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
073
074    private boolean hasHitEOF;
075
076    /**
077     * The meta-data about the current entry
078     */
079    private TarArchiveEntry currEntry;
080
081    // the global PAX header
082    private Map<String, String> globalPaxHeaders = new HashMap<>();
083
084    private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>();
085
086    /**
087     * Constructor for TarFile.
088     *
089     * @param content the content to use
090     * @throws IOException when reading the tar archive fails
091     */
092    public TarFile(final byte[] content) throws IOException {
093        this(new SeekableInMemoryByteChannel(content));
094    }
095
096    /**
097     * Constructor for TarFile.
098     *
099     * @param content  the content to use
100     * @param encoding the encoding to use
101     * @throws IOException when reading the tar archive fails
102     */
103    public TarFile(final byte[] content, final String encoding) throws IOException {
104        this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
105    }
106
107    /**
108     * Constructor for TarFile.
109     *
110     * @param content the content to use
111     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
112     *                ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
113     *                exception instead.
114     * @throws IOException when reading the tar archive fails
115     */
116    public TarFile(final byte[] content, final boolean lenient) throws IOException {
117        this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
118    }
119
120    /**
121     * Constructor for TarFile.
122     *
123     * @param archive the file of the archive to use
124     * @throws IOException when reading the tar archive fails
125     */
126    public TarFile(final File archive) throws IOException {
127        this(archive.toPath());
128    }
129
130    /**
131     * Constructor for TarFile.
132     *
133     * @param archive  the file of the archive to use
134     * @param encoding the encoding to use
135     * @throws IOException when reading the tar archive fails
136     */
137    public TarFile(final File archive, final String encoding) throws IOException {
138        this(archive.toPath(), encoding);
139    }
140
141    /**
142     * Constructor for TarFile.
143     *
144     * @param archive the file of the archive to use
145     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
146     *                ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
147     *                exception instead.
148     * @throws IOException when reading the tar archive fails
149     */
150    public TarFile(final File archive, final boolean lenient) throws IOException {
151        this(archive.toPath(), lenient);
152    }
153
154    /**
155     * Constructor for TarFile.
156     *
157     * @param archivePath the path of the archive to use
158     * @throws IOException when reading the tar archive fails
159     */
160    public TarFile(final Path archivePath) throws IOException {
161        this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
162    }
163
164    /**
165     * Constructor for TarFile.
166     *
167     * @param archivePath the path of the archive to use
168     * @param encoding    the encoding to use
169     * @throws IOException when reading the tar archive fails
170     */
171    public TarFile(final Path archivePath, final String encoding) throws IOException {
172        this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
173    }
174
175    /**
176     * Constructor for TarFile.
177     *
178     * @param archivePath the path of the archive to use
179     * @param lenient     when set to true illegal values for group/userid, mode, device numbers and timestamp will be
180     *                    ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
181     *                    exception instead.
182     * @throws IOException when reading the tar archive fails
183     */
184    public TarFile(final Path archivePath, final boolean lenient) throws IOException {
185        this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
186    }
187
188    /**
189     * Constructor for TarFile.
190     *
191     * @param content the content to use
192     * @throws IOException when reading the tar archive fails
193     */
194    public TarFile(final SeekableByteChannel content) throws IOException {
195        this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
196    }
197
198    /**
199     * Constructor for TarFile.
200     *
201     * @param archive    the seekable byte channel to use
202     * @param blockSize  the blocks size to use
203     * @param recordSize the record size to use
204     * @param encoding   the encoding to use
205     * @param lenient    when set to true illegal values for group/userid, mode, device numbers and timestamp will be
206     *                   ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
207     *                   exception instead.
208     * @throws IOException when reading the tar archive fails
209     */
210    public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient) throws IOException {
211        this.archive = archive;
212        this.hasHitEOF = false;
213        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
214        this.recordSize = recordSize;
215        this.recordBuffer = ByteBuffer.allocate(this.recordSize);
216        this.blockSize = blockSize;
217        this.lenient = lenient;
218
219        TarArchiveEntry entry;
220        while ((entry = getNextTarEntry()) != null) {
221            entries.add(entry);
222        }
223    }
224
225    /**
226     * Get the next entry in this tar archive. This will skip
227     * to the end of the current entry, if there is one, and
228     * place the position of the channel at the header of the
229     * next entry, and read the header and instantiate a new
230     * TarEntry from the header bytes and return that entry.
231     * If there are no more entries in the archive, null will
232     * be returned to indicate that the end of the archive has
233     * been reached.
234     *
235     * @return The next TarEntry in the archive, or null if there is no next entry.
236     * @throws IOException when reading the next TarEntry fails
237     */
238    private TarArchiveEntry getNextTarEntry() throws IOException {
239        if (isAtEOF()) {
240            return null;
241        }
242
243        if (currEntry != null) {
244            // Skip to the end of the entry
245            repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize());
246            throwExceptionIfPositionIsNotInArchive();
247            skipRecordPadding();
248        }
249
250        final ByteBuffer headerBuf = getRecord();
251        if (null == headerBuf) {
252            /* hit EOF */
253            currEntry = null;
254            return null;
255        }
256
257        try {
258            final long position = archive.position();
259            currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position);
260        } catch (final IllegalArgumentException e) {
261            throw new IOException("Error detected parsing the header", e);
262        }
263
264        if (currEntry.isGNULongLinkEntry()) {
265            final byte[] longLinkData = getLongNameData();
266            if (longLinkData == null) {
267                // Bugzilla: 40334
268                // Malformed tar file - long link entry name not followed by
269                // entry
270                return null;
271            }
272            currEntry.setLinkName(zipEncoding.decode(longLinkData));
273        }
274
275        if (currEntry.isGNULongNameEntry()) {
276            final byte[] longNameData = getLongNameData();
277            if (longNameData == null) {
278                // Bugzilla: 40334
279                // Malformed tar file - long entry name not followed by
280                // entry
281                return null;
282            }
283
284            // COMPRESS-509 : the name of directories should end with '/'
285            final String name = zipEncoding.decode(longNameData);
286            currEntry.setName(name);
287            if (currEntry.isDirectory() && !name.endsWith("/")) {
288                currEntry.setName(name + "/");
289            }
290        }
291
292        if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers
293            readGlobalPaxHeaders();
294        }
295
296        try {
297            if (currEntry.isPaxHeader()) { // Process Pax headers
298                paxHeaders();
299            } else if (!globalPaxHeaders.isEmpty()) {
300                applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
301            }
302        } catch (NumberFormatException e) {
303            throw new IOException("Error detected parsing the pax header", e);
304        }
305
306        if (currEntry.isOldGNUSparse()) { // Process sparse files
307            readOldGNUSparse();
308        }
309
310        return currEntry;
311    }
312
313    /**
314     * Adds the sparse chunks from the current entry to the sparse chunks,
315     * including any additional sparse entries following the current entry.
316     *
317     * @throws IOException when reading the sparse entry fails
318     */
319    private void readOldGNUSparse() throws IOException {
320        if (currEntry.isExtended()) {
321            TarArchiveSparseEntry entry;
322            do {
323                final ByteBuffer headerBuf = getRecord();
324                if (headerBuf == null) {
325                    throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
326                }
327                entry = new TarArchiveSparseEntry(headerBuf.array());
328                currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
329                currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
330            } while (entry.isExtended());
331        }
332
333        // sparse headers are all done reading, we need to build
334        // sparse input streams using these sparse headers
335        buildSparseInputStreams();
336    }
337
338    /**
339     * Build the input streams consisting of all-zero input streams and non-zero input streams.
340     * When reading from the non-zero input streams, the data is actually read from the original input stream.
341     * The size of each input stream is introduced by the sparse headers.
342     *
343     * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the
344     *        0 size input streams because they are meaningless.
345     */
346    private void buildSparseInputStreams() throws IOException {
347        final List<InputStream> streams = new ArrayList<>();
348
349        final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
350
351        // Stream doesn't need to be closed at all as it doesn't use any resources
352        final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR
353        // logical offset into the extracted entry
354        long offset = 0;
355        long numberOfZeroBytesInSparseEntry = 0;
356        for (TarArchiveStructSparse sparseHeader : sparseHeaders) {
357            final long zeroBlockSize = sparseHeader.getOffset() - offset;
358            if (zeroBlockSize < 0) {
359                // sparse header says to move backwards inside of the extracted entry
360                throw new IOException("Corrupted struct sparse detected");
361            }
362
363            // only store the zero block if it is not empty
364            if (zeroBlockSize > 0) {
365                streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize));
366                numberOfZeroBytesInSparseEntry += zeroBlockSize;
367            }
368
369            // only store the input streams with non-zero size
370            if (sparseHeader.getNumbytes() > 0) {
371                final long start =
372                    currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry;
373                if (start + sparseHeader.getNumbytes() < start) {
374                    // possible integer overflow
375                    throw new IOException("Unreadable TAR archive, sparse block offset or length too big");
376                }
377                streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive));
378            }
379
380            offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
381        }
382
383        sparseInputStreams.put(currEntry.getName(), streams);
384    }
385
386    /**
387     * Update the current entry with the read pax headers
388     * @param headers Headers read from the pax header
389     * @param sparseHeaders Sparse headers read from pax header
390     */
391    private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders)
392        throws IOException {
393        currEntry.updateEntryFromPaxHeaders(headers);
394        currEntry.setSparseHeaders(sparseHeaders);
395    }
396
397    /**
398     * <p>
399     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
400     * may appear multi times, and they look like:
401     * <pre>
402     * GNU.sparse.size=size
403     * GNU.sparse.numblocks=numblocks
404     * repeat numblocks times
405     *   GNU.sparse.offset=offset
406     *   GNU.sparse.numbytes=numbytes
407     * end repeat
408     * </pre>
409     *
410     * <p>
411     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
412     * <pre>
413     * GNU.sparse.map
414     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
415     * </pre>
416     *
417     * <p>
418     * For PAX Format 1.X:
419     * <br>
420     * The sparse map itself is stored in the file data block, preceding the actual file data.
421     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
422     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
423     * giving the offset and size of the data block it describes.
424     * @throws IOException
425     */
426    private void paxHeaders() throws IOException {
427        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
428        final Map<String, String> headers;
429        try (final InputStream input = getInputStream(currEntry)) {
430            headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize());
431        }
432
433        // for 0.1 PAX Headers
434        if (headers.containsKey("GNU.sparse.map")) {
435            sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get("GNU.sparse.map")));
436        }
437        getNextTarEntry(); // Get the actual file entry
438        if (currEntry == null) {
439            throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
440        }
441        applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
442
443        // for 1.0 PAX Format, the sparse map is stored in the file data block
444        if (currEntry.isPaxGNU1XSparse()) {
445            try (final InputStream input = getInputStream(currEntry)) {
446                sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize);
447            }
448            currEntry.setSparseHeaders(sparseHeaders);
449            // data of the entry is after the pax gnu entry. So we need to update the data position once again
450            currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
451        }
452
453        // sparse headers are all done reading, we need to build
454        // sparse input streams using these sparse headers
455        buildSparseInputStreams();
456    }
457
458    private void readGlobalPaxHeaders() throws IOException {
459        try (InputStream input = getInputStream(currEntry)) {
460            globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders,
461                currEntry.getSize());
462        }
463        getNextTarEntry(); // Get the actual file entry
464
465        if (currEntry == null) {
466            throw new IOException("Error detected parsing the pax header");
467        }
468    }
469
470    /**
471     * Get the next entry in this tar archive as longname data.
472     *
473     * @return The next entry in the archive as longname data, or null.
474     * @throws IOException on error
475     */
476    private byte[] getLongNameData() throws IOException {
477        final ByteArrayOutputStream longName = new ByteArrayOutputStream();
478        int length;
479        try (final InputStream in = getInputStream(currEntry)) {
480            while ((length = in.read(smallBuf)) >= 0) {
481                longName.write(smallBuf, 0, length);
482            }
483        }
484        getNextTarEntry();
485        if (currEntry == null) {
486            // Bugzilla: 40334
487            // Malformed tar file - long entry name not followed by entry
488            return null;
489        }
490        byte[] longNameData = longName.toByteArray();
491        // remove trailing null terminator(s)
492        length = longNameData.length;
493        while (length > 0 && longNameData[length - 1] == 0) {
494            --length;
495        }
496        if (length != longNameData.length) {
497            final byte[] l = new byte[length];
498            System.arraycopy(longNameData, 0, l, 0, length);
499            longNameData = l;
500        }
501        return longNameData;
502    }
503
504    /**
505     * The last record block should be written at the full size, so skip any
506     * additional space used to fill a record after an entry
507     *
508     * @throws IOException when skipping the padding of the record fails
509     */
510    private void skipRecordPadding() throws IOException {
511        if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) {
512            final long numRecords = (currEntry.getSize() / recordSize) + 1;
513            final long padding = (numRecords * recordSize) - currEntry.getSize();
514            repositionForwardBy(padding);
515            throwExceptionIfPositionIsNotInArchive();
516        }
517    }
518
519    private void repositionForwardTo(final long newPosition) throws IOException {
520        final long currPosition = archive.position();
521        if (newPosition < currPosition) {
522            throw new IOException("trying to move backwards inside of the archive");
523        }
524        archive.position(newPosition);
525    }
526
527    private void repositionForwardBy(final long offset) throws IOException {
528        repositionForwardTo(archive.position() + offset);
529    }
530
531    /**
532     * Checks if the current position of the SeekableByteChannel is in the archive.
533     * @throws IOException If the position is not in the archive
534     */
535    private void throwExceptionIfPositionIsNotInArchive() throws IOException {
536        if (archive.size() < archive.position()) {
537            throw new IOException("Truncated TAR archive");
538        }
539    }
540
541    /**
542     * Get the next record in this tar archive. This will skip
543     * over any remaining data in the current entry, if there
544     * is one, and place the input stream at the header of the
545     * next entry.
546     *
547     * <p>If there are no more entries in the archive, null will be
548     * returned to indicate that the end of the archive has been
549     * reached.  At the same time the {@code hasHitEOF} marker will be
550     * set to true.</p>
551     *
552     * @return The next TarEntry in the archive, or null if there is no next entry.
553     * @throws IOException when reading the next TarEntry fails
554     */
555    private ByteBuffer getRecord() throws IOException {
556        ByteBuffer headerBuf = readRecord();
557        setAtEOF(isEOFRecord(headerBuf));
558        if (isAtEOF() && headerBuf != null) {
559            // Consume rest
560            tryToConsumeSecondEOFRecord();
561            consumeRemainderOfLastBlock();
562            headerBuf = null;
563        }
564        return headerBuf;
565    }
566
567    /**
568     * Tries to read the next record resetting the position in the
569     * archive if it is not a EOF record.
570     *
571     * <p>This is meant to protect against cases where a tar
572     * implementation has written only one EOF record when two are
573     * expected. Actually this won't help since a non-conforming
574     * implementation likely won't fill full blocks consisting of - by
575     * default - ten records either so we probably have already read
576     * beyond the archive anyway.</p>
577     *
578     * @throws IOException if reading the record of resetting the position in the archive fails
579     */
580    private void tryToConsumeSecondEOFRecord() throws IOException {
581        boolean shouldReset = true;
582        try {
583            shouldReset = !isEOFRecord(readRecord());
584        } finally {
585            if (shouldReset) {
586                archive.position(archive.position() - recordSize);
587            }
588        }
589    }
590
591    /**
592     * This method is invoked once the end of the archive is hit, it
593     * tries to consume the remaining bytes under the assumption that
594     * the tool creating this archive has padded the last block.
595     */
596    private void consumeRemainderOfLastBlock() throws IOException {
597        final long bytesReadOfLastBlock = archive.position() % blockSize;
598        if (bytesReadOfLastBlock > 0) {
599            repositionForwardBy(blockSize - bytesReadOfLastBlock);
600        }
601    }
602
603    /**
604     * Read a record from the input stream and return the data.
605     *
606     * @return The record data or null if EOF has been hit.
607     * @throws IOException if reading from the archive fails
608     */
609    private ByteBuffer readRecord() throws IOException {
610        ((Buffer)recordBuffer).rewind();
611        final int readNow = archive.read(recordBuffer);
612        if (readNow != recordSize) {
613            return null;
614        }
615        return recordBuffer;
616    }
617
618    /**
619     * Get all TAR Archive Entries from the TarFile
620     *
621     * @return All entries from the tar file
622     */
623    public List<TarArchiveEntry> getEntries() {
624        return new ArrayList<>(entries);
625    }
626
627    private boolean isEOFRecord(final ByteBuffer headerBuf) {
628        return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize);
629    }
630
631    protected final boolean isAtEOF() {
632        return hasHitEOF;
633    }
634
635    protected final void setAtEOF(final boolean b) {
636        hasHitEOF = b;
637    }
638
639    private boolean isDirectory() {
640        return currEntry != null && currEntry.isDirectory();
641    }
642
643    /**
644     * Gets the input stream for the provided Tar Archive Entry.
645     * @param entry Entry to get the input stream from
646     * @return Input stream of the provided entry
647     * @throws IOException Corrupted TAR archive. Can't read entry.
648     */
649    public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
650        try {
651            return new BoundedTarEntryInputStream(entry, archive);
652        } catch (RuntimeException ex) {
653            throw new IOException("Corrupted TAR archive. Can't read entry", ex);
654        }
655    }
656
657    @Override
658    public void close() throws IOException {
659        archive.close();
660    }
661
662    private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream {
663
664        private final SeekableByteChannel channel;
665
666        private final TarArchiveEntry entry;
667
668        private long entryOffset;
669
670        private int currentSparseInputStreamIndex;
671
672        BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
673            super(entry.getDataOffset(), entry.getRealSize());
674            if (channel.size() - entry.getSize() < entry.getDataOffset()) {
675                throw new IOException("entry size exceeds archive size");
676            }
677            this.entry = entry;
678            this.channel = channel;
679        }
680
681        @Override
682        protected int read(final long pos, final ByteBuffer buf) throws IOException {
683            if (entryOffset >= entry.getRealSize()) {
684                return -1;
685            }
686
687            final int totalRead;
688            if (entry.isSparse()) {
689                totalRead = readSparse(entryOffset, buf, ((Buffer)buf).limit());
690            } else {
691                totalRead = readArchive(pos, buf);
692            }
693
694            if (totalRead == -1) {
695                if (buf.array().length > 0) {
696                    throw new IOException("Truncated TAR archive");
697                }
698                setAtEOF(true);
699            } else {
700                entryOffset += totalRead;
701                ((Buffer)buf).flip();
702            }
703            return totalRead;
704        }
705
706        private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException {
707            // if there are no actual input streams, just read from the original archive
708            final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName());
709            if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) {
710                return readArchive(entry.getDataOffset() + pos, buf);
711            }
712
713            if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) {
714                return -1;
715            }
716
717            final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex);
718            final byte[] bufArray = new byte[numToRead];
719            final int readLen = currentInputStream.read(bufArray);
720            if (readLen != -1) {
721                buf.put(bufArray, 0, readLen);
722            }
723
724            // if the current input stream is the last input stream,
725            // just return the number of bytes read from current input stream
726            if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) {
727                return readLen;
728            }
729
730            // if EOF of current input stream is meet, open a new input stream and recursively call read
731            if (readLen == -1) {
732                currentSparseInputStreamIndex++;
733                return readSparse(pos, buf, numToRead);
734            }
735
736            // if the rest data of current input stream is not long enough, open a new input stream
737            // and recursively call read
738            if (readLen < numToRead) {
739                currentSparseInputStreamIndex++;
740                final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen);
741                if (readLenOfNext == -1) {
742                    return readLen;
743                }
744
745                return readLen + readLenOfNext;
746            }
747
748            // if the rest data of current input stream is enough(which means readLen == len), just return readLen
749            return readLen;
750        }
751
752        private int readArchive(final long pos, final ByteBuffer buf) throws IOException {
753            channel.position(pos);
754            return channel.read(buf);
755        }
756    }
757}