001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import static java.nio.charset.StandardCharsets.UTF_8;
022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
023import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
024import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_NUMBYTES_LEN;
025import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_OFFSET_LEN;
026
027import java.io.ByteArrayOutputStream;
028import java.io.IOException;
029import java.io.InputStream;
030import java.io.UncheckedIOException;
031import java.math.BigInteger;
032import java.nio.Buffer;
033import java.nio.ByteBuffer;
034import java.nio.charset.Charset;
035import java.util.ArrayList;
036import java.util.Collections;
037import java.util.HashMap;
038import java.util.List;
039import java.util.Map;
040
041import org.apache.commons.compress.archivers.zip.ZipEncoding;
042import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
043import org.apache.commons.compress.utils.CharsetNames;
044import org.apache.commons.compress.utils.IOUtils;
045
046/**
047 * This class provides static utility methods to work with byte streams.
048 *
049 * @Immutable
050 */
051// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
052public class TarUtils {
053
054    private static final int BYTE_MASK = 255;
055
056    static final ZipEncoding DEFAULT_ENCODING =
057        ZipEncodingHelper.getZipEncoding(null);
058
059    /**
060     * Encapsulates the algorithms used up to Commons Compress 1.3 as
061     * ZipEncoding.
062     */
063    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
064            @Override
065            public boolean canEncode(final String name) { return true; }
066
067            @Override
068            public ByteBuffer encode(final String name) {
069                final int length = name.length();
070                final byte[] buf = new byte[length];
071
072                // copy until end of input or output is reached.
073                for (int i = 0; i < length; ++i) {
074                    buf[i] = (byte) name.charAt(i);
075                }
076                return ByteBuffer.wrap(buf);
077            }
078
079            @Override
080            public String decode(final byte[] buffer) {
081                final int length = buffer.length;
082                final StringBuilder result = new StringBuilder(length);
083
084                for (final byte b : buffer) {
085                    if (b == 0) { // Trailing null
086                        break;
087                    }
088                    result.append((char) (b & 0xFF)); // Allow for sign-extension
089                }
090
091                return result.toString();
092            }
093        };
094
095    /** Private constructor to prevent instantiation of this utility class. */
096    private TarUtils(){
097    }
098
099    /**
100     * Parse an octal string from a buffer.
101     *
102     * <p>Leading spaces are ignored.
103     * The buffer must contain a trailing space or NUL,
104     * and may contain an additional trailing space or NUL.</p>
105     *
106     * <p>The input buffer is allowed to contain all NULs,
107     * in which case the method returns 0L
108     * (this allows for missing fields).</p>
109     *
110     * <p>To work-around some tar implementations that insert a
111     * leading NUL this method returns 0 if it detects a leading NUL
112     * since Commons Compress 1.4.</p>
113     *
114     * @param buffer The buffer from which to parse.
115     * @param offset The offset into the buffer from which to parse.
116     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
117     * @return The long value of the octal string.
118     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
119     */
120    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
121        long result = 0;
122        int end = offset + length;
123        int start = offset;
124
125        if (length < 2) {
126            throw new IllegalArgumentException("Length " + length + " must be at least 2");
127        }
128
129        if (buffer[start] == 0) {
130            return 0L;
131        }
132
133        // Skip leading spaces
134        while (start < end) {
135            if (buffer[start] != ' ') {
136                break;
137            }
138            start++;
139        }
140
141        // Trim all trailing NULs and spaces.
142        // The ustar and POSIX tar specs require a trailing NUL or
143        // space but some implementations use the extra digit for big
144        // sizes/uids/gids ...
145        byte trailer = buffer[end - 1];
146        while (start < end && (trailer == 0 || trailer == ' ')) {
147            end--;
148            trailer = buffer[end - 1];
149        }
150
151        for (; start < end; start++) {
152            final byte currentByte = buffer[start];
153            // CheckStyle:MagicNumber OFF
154            if (currentByte < '0' || currentByte > '7') {
155                throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
156            }
157            result = (result << 3) + (currentByte - '0'); // convert from ASCII
158            // CheckStyle:MagicNumber ON
159        }
160
161        return result;
162    }
163
164    /**
165     * Compute the value contained in a byte buffer.  If the most
166     * significant bit of the first byte in the buffer is set, this
167     * bit is ignored and the rest of the buffer is interpreted as a
168     * binary number.  Otherwise, the buffer is interpreted as an
169     * octal number as per the parseOctal function above.
170     *
171     * @param buffer The buffer from which to parse.
172     * @param offset The offset into the buffer from which to parse.
173     * @param length The maximum number of bytes to parse.
174     * @return The long value of the octal or binary string.
175     * @throws IllegalArgumentException if the trailing space/NUL is
176     * missing or an invalid byte is detected in an octal number, or
177     * if a binary number would exceed the size of a signed long
178     * 64-bit integer.
179     * @since 1.4
180     */
181    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
182                                          final int length) {
183
184        if ((buffer[offset] & 0x80) == 0) {
185            return parseOctal(buffer, offset, length);
186        }
187        final boolean negative = buffer[offset] == (byte) 0xff;
188        if (length < 9) {
189            return parseBinaryLong(buffer, offset, length, negative);
190        }
191        return parseBinaryBigInteger(buffer, offset, length, negative);
192    }
193
194    private static long parseBinaryLong(final byte[] buffer, final int offset,
195                                        final int length,
196                                        final boolean negative) {
197        if (length >= 9) {
198            throw new IllegalArgumentException("At offset " + offset + ", "
199                                               + length + " byte binary number"
200                                               + " exceeds maximum signed long"
201                                               + " value");
202        }
203        long val = 0;
204        for (int i = 1; i < length; i++) {
205            val = (val << 8) + (buffer[offset + i] & 0xff);
206        }
207        if (negative) {
208            // 2's complement
209            val--;
210            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
211        }
212        return negative ? -val : val;
213    }
214
215    private static long parseBinaryBigInteger(final byte[] buffer,
216                                              final int offset,
217                                              final int length,
218                                              final boolean negative) {
219        final byte[] remainder = new byte[length - 1];
220        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
221        BigInteger val = new BigInteger(remainder);
222        if (negative) {
223            // 2's complement
224            val = val.add(BigInteger.valueOf(-1)).not();
225        }
226        if (val.bitLength() > 63) {
227            throw new IllegalArgumentException("At offset " + offset + ", "
228                                               + length + " byte binary number"
229                                               + " exceeds maximum signed long"
230                                               + " value");
231        }
232        return negative ? -val.longValue() : val.longValue();
233    }
234
235    /**
236     * Parse a boolean byte from a buffer.
237     * Leading spaces and NUL are ignored.
238     * The buffer may contain trailing spaces or NULs.
239     *
240     * @param buffer The buffer from which to parse.
241     * @param offset The offset into the buffer from which to parse.
242     * @return The boolean value of the bytes.
243     * @throws IllegalArgumentException if an invalid byte is detected.
244     */
245    public static boolean parseBoolean(final byte[] buffer, final int offset) {
246        return buffer[offset] == 1;
247    }
248
249    // Helper method to generate the exception message
250    private static String exceptionMessage(final byte[] buffer, final int offset,
251            final int length, final int current, final byte currentByte) {
252        // default charset is good enough for an exception message,
253        //
254        // the alternative was to modify parseOctal and
255        // parseOctalOrBinary to receive the ZipEncoding of the
256        // archive (deprecating the existing public methods, of
257        // course) and dealing with the fact that ZipEncoding#decode
258        // can throw an IOException which parseOctal* doesn't declare
259        String string = new String(buffer, offset, length, Charset.defaultCharset());
260
261        string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
262        return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
263    }
264
265    /**
266     * Parse an entry name from a buffer.
267     * Parsing stops when a NUL is found
268     * or the buffer length is reached.
269     *
270     * @param buffer The buffer from which to parse.
271     * @param offset The offset into the buffer from which to parse.
272     * @param length The maximum number of bytes to parse.
273     * @return The entry name.
274     */
275    public static String parseName(final byte[] buffer, final int offset, final int length) {
276        try {
277            return parseName(buffer, offset, length, DEFAULT_ENCODING);
278        } catch (final IOException ex) { // NOSONAR
279            try {
280                return parseName(buffer, offset, length, FALLBACK_ENCODING);
281            } catch (final IOException ex2) {
282                // impossible
283                throw new UncheckedIOException(ex2); //NOSONAR
284            }
285        }
286    }
287
288    /**
289     * Parse an entry name from a buffer.
290     * Parsing stops when a NUL is found
291     * or the buffer length is reached.
292     *
293     * @param buffer The buffer from which to parse.
294     * @param offset The offset into the buffer from which to parse.
295     * @param length The maximum number of bytes to parse.
296     * @param encoding name of the encoding to use for file names
297     * @since 1.4
298     * @return The entry name.
299     * @throws IOException on error
300     */
301    public static String parseName(final byte[] buffer, final int offset,
302                                   final int length,
303                                   final ZipEncoding encoding)
304        throws IOException {
305
306        int len = 0;
307        for (int i = offset; len < length && buffer[i] != 0; i++) {
308            len++;
309        }
310        if (len > 0) {
311            final byte[] b = new byte[len];
312            System.arraycopy(buffer, offset, b, 0, len);
313            return encoding.decode(b);
314        }
315        return "";
316    }
317
318    /**
319     * Parses the content of a PAX 1.0 sparse block.
320     * @since 1.20
321     * @param buffer The buffer from which to parse.
322     * @param offset The offset into the buffer from which to parse.
323     * @return a parsed sparse struct
324     */
325    public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
326        final long sparseOffset = parseOctalOrBinary(buffer, offset, SPARSE_OFFSET_LEN);
327        final long sparseNumbytes = parseOctalOrBinary(buffer, offset + SPARSE_OFFSET_LEN, SPARSE_NUMBYTES_LEN);
328
329        return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
330    }
331
332    /**
333     * @since 1.21
334     */
335    static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries)
336        throws IOException {
337        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
338        for (int i = 0; i < entries; i++) {
339            try {
340                final TarArchiveStructSparse sparseHeader =
341                    parseSparse(buffer, offset + i * (SPARSE_OFFSET_LEN + SPARSE_NUMBYTES_LEN));
342
343                if (sparseHeader.getOffset() < 0) {
344                    throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
345                }
346                if (sparseHeader.getNumbytes() < 0) {
347                    throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
348                }
349                sparseHeaders.add(sparseHeader);
350            } catch (IllegalArgumentException ex) {
351                // thrown internally by parseOctalOrBinary
352                throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
353            }
354        }
355        return Collections.unmodifiableList(sparseHeaders);
356    }
357
358    /**
359     * Copy a name into a buffer.
360     * Copies characters from the name into the buffer
361     * starting at the specified offset.
362     * If the buffer is longer than the name, the buffer
363     * is filled with trailing NULs.
364     * If the name is longer than the buffer,
365     * the output is truncated.
366     *
367     * @param name The header name from which to copy the characters.
368     * @param buf The buffer where the name is to be stored.
369     * @param offset The starting offset into the buffer
370     * @param length The maximum number of header bytes to copy.
371     * @return The updated offset, i.e. offset + length
372     */
373    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
374        try {
375            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
376        } catch (final IOException ex) { // NOSONAR
377            try {
378                return formatNameBytes(name, buf, offset, length,
379                                       FALLBACK_ENCODING);
380            } catch (final IOException ex2) {
381                // impossible
382                throw new UncheckedIOException(ex2); //NOSONAR
383            }
384        }
385    }
386
387    /**
388     * Copy a name into a buffer.
389     * Copies characters from the name into the buffer
390     * starting at the specified offset.
391     * If the buffer is longer than the name, the buffer
392     * is filled with trailing NULs.
393     * If the name is longer than the buffer,
394     * the output is truncated.
395     *
396     * @param name The header name from which to copy the characters.
397     * @param buf The buffer where the name is to be stored.
398     * @param offset The starting offset into the buffer
399     * @param length The maximum number of header bytes to copy.
400     * @param encoding name of the encoding to use for file names
401     * @since 1.4
402     * @return The updated offset, i.e. offset + length
403     * @throws IOException on error
404     */
405    public static int formatNameBytes(final String name, final byte[] buf, final int offset,
406                                      final int length,
407                                      final ZipEncoding encoding)
408        throws IOException {
409        int len = name.length();
410        ByteBuffer b = encoding.encode(name);
411        while (((Buffer)b).limit() > length && len > 0) {
412            b = encoding.encode(name.substring(0, --len));
413        }
414        final int limit = ((Buffer)b).limit() - ((Buffer)b).position();
415        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
416
417        // Pad any remaining output bytes with NUL
418        for (int i = limit; i < length; ++i) {
419            buf[offset + i] = 0;
420        }
421
422        return offset + length;
423    }
424
425    /**
426     * Fill buffer with unsigned octal number, padded with leading zeroes.
427     *
428     * @param value number to convert to octal - treated as unsigned
429     * @param buffer destination buffer
430     * @param offset starting offset in buffer
431     * @param length length of buffer to fill
432     * @throws IllegalArgumentException if the value will not fit in the buffer
433     */
434    public static void formatUnsignedOctalString(final long value, final byte[] buffer,
435            final int offset, final int length) {
436        int remaining = length;
437        remaining--;
438        if (value == 0) {
439            buffer[offset + remaining--] = (byte) '0';
440        } else {
441            long val = value;
442            for (; remaining >= 0 && val != 0; --remaining) {
443                // CheckStyle:MagicNumber OFF
444                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
445                val = val >>> 3;
446                // CheckStyle:MagicNumber ON
447            }
448            if (val != 0){
449                throw new IllegalArgumentException
450                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
451            }
452        }
453
454        for (; remaining >= 0; --remaining) { // leading zeros
455            buffer[offset + remaining] = (byte) '0';
456        }
457    }
458
459    /**
460     * Write an octal integer into a buffer.
461     *
462     * Uses {@link #formatUnsignedOctalString} to format
463     * the value as an octal string with leading zeros.
464     * The converted number is followed by space and NUL
465     *
466     * @param value The value to write
467     * @param buf The buffer to receive the output
468     * @param offset The starting offset into the buffer
469     * @param length The size of the output buffer
470     * @return The updated offset, i.e offset+length
471     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
472     */
473    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
474
475        int idx=length-2; // For space and trailing null
476        formatUnsignedOctalString(value, buf, offset, idx);
477
478        buf[offset + idx++] = (byte) ' '; // Trailing space
479        buf[offset + idx]   = 0; // Trailing null
480
481        return offset + length;
482    }
483
484    /**
485     * Write an octal long integer into a buffer.
486     *
487     * Uses {@link #formatUnsignedOctalString} to format
488     * the value as an octal string with leading zeros.
489     * The converted number is followed by a space.
490     *
491     * @param value The value to write as octal
492     * @param buf The destinationbuffer.
493     * @param offset The starting offset into the buffer.
494     * @param length The length of the buffer
495     * @return The updated offset
496     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
497     */
498    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
499
500        final int idx=length-1; // For space
501
502        formatUnsignedOctalString(value, buf, offset, idx);
503        buf[offset + idx] = (byte) ' '; // Trailing space
504
505        return offset + length;
506    }
507
508    /**
509     * Write an long integer into a buffer as an octal string if this
510     * will fit, or as a binary number otherwise.
511     *
512     * Uses {@link #formatUnsignedOctalString} to format
513     * the value as an octal string with leading zeros.
514     * The converted number is followed by a space.
515     *
516     * @param value The value to write into the buffer.
517     * @param buf The destination buffer.
518     * @param offset The starting offset into the buffer.
519     * @param length The length of the buffer.
520     * @return The updated offset.
521     * @throws IllegalArgumentException if the value (and trailer)
522     * will not fit in the buffer.
523     * @since 1.4
524     */
525    public static int formatLongOctalOrBinaryBytes(
526        final long value, final byte[] buf, final int offset, final int length) {
527
528        // Check whether we are dealing with UID/GID or SIZE field
529        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
530
531        final boolean negative = value < 0;
532        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
533            return formatLongOctalBytes(value, buf, offset, length);
534        }
535
536        if (length < 9) {
537            formatLongBinary(value, buf, offset, length, negative);
538        } else {
539            formatBigIntegerBinary(value, buf, offset, length, negative);
540        }
541
542        buf[offset] = (byte) (negative ? 0xff : 0x80);
543        return offset + length;
544    }
545
546    private static void formatLongBinary(final long value, final byte[] buf,
547                                         final int offset, final int length,
548                                         final boolean negative) {
549        final int bits = (length - 1) * 8;
550        final long max = 1L << bits;
551        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
552        if (val < 0 || val >= max) {
553            throw new IllegalArgumentException("Value " + value +
554                " is too large for " + length + " byte field.");
555        }
556        if (negative) {
557            val ^= max - 1;
558            val++;
559            val |= 0xffL << bits;
560        }
561        for (int i = offset + length - 1; i >= offset; i--) {
562            buf[i] = (byte) val;
563            val >>= 8;
564        }
565    }
566
567    private static void formatBigIntegerBinary(final long value, final byte[] buf,
568                                               final int offset,
569                                               final int length,
570                                               final boolean negative) {
571        final BigInteger val = BigInteger.valueOf(value);
572        final byte[] b = val.toByteArray();
573        final int len = b.length;
574        if (len > length - 1) {
575            throw new IllegalArgumentException("Value " + value +
576                " is too large for " + length + " byte field.");
577        }
578        final int off = offset + length - len;
579        System.arraycopy(b, 0, buf, off, len);
580        final byte fill = (byte) (negative ? 0xff : 0);
581        for (int i = offset + 1; i < off; i++) {
582            buf[i] = fill;
583        }
584    }
585
586    /**
587     * Writes an octal value into a buffer.
588     *
589     * Uses {@link #formatUnsignedOctalString} to format
590     * the value as an octal string with leading zeros.
591     * The converted number is followed by NUL and then space.
592     *
593     * @param value The value to convert
594     * @param buf The destination buffer
595     * @param offset The starting offset into the buffer.
596     * @param length The size of the buffer.
597     * @return The updated value of offset, i.e. offset+length
598     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
599     */
600    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
601
602        int idx=length-2; // for NUL and space
603        formatUnsignedOctalString(value, buf, offset, idx);
604
605        buf[offset + idx++]   = 0; // Trailing null
606        buf[offset + idx]     = (byte) ' '; // Trailing space
607
608        return offset + length;
609    }
610
611    /**
612     * Compute the checksum of a tar entry header.
613     *
614     * @param buf The tar entry's header buffer.
615     * @return The computed checksum.
616     */
617    public static long computeCheckSum(final byte[] buf) {
618        long sum = 0;
619
620        for (final byte element : buf) {
621            sum += BYTE_MASK & element;
622        }
623
624        return sum;
625    }
626
627    /**
628     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>:
629     * <blockquote>
630     * The checksum is calculated by taking the sum of the unsigned byte values
631     * of the header block with the eight checksum bytes taken to be ascii
632     * spaces (decimal value 32). It is stored as a six digit octal number with
633     * leading zeroes followed by a NUL and then a space. Various
634     * implementations do not adhere to this format. For better compatibility,
635     * ignore leading and trailing whitespace, and get the first six digits. In
636     * addition, some historic tar implementations treated bytes as signed.
637     * Implementations typically calculate the checksum both ways, and treat it
638     * as good if either the signed or unsigned sum matches the included
639     * checksum.
640     * </blockquote>
641     * <p>
642     * The return value of this method should be treated as a best-effort
643     * heuristic rather than an absolute and final truth. The checksum
644     * verification logic may well evolve over time as more special cases
645     * are encountered.
646     *
647     * @param header tar header
648     * @return whether the checksum is reasonably good
649     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
650     * @since 1.5
651     */
652    public static boolean verifyCheckSum(final byte[] header) {
653        final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN);
654        long unsignedSum = 0;
655        long signedSum = 0;
656
657        for (int i = 0; i < header.length; i++) {
658            byte b = header[i];
659            if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
660                b = ' ';
661            }
662            unsignedSum += 0xff & b;
663            signedSum += b;
664        }
665        return storedSum == unsignedSum || storedSum == signedSum;
666    }
667
668    /**
669     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
670     * may appear multi times, and they look like:
671     *
672     * GNU.sparse.size=size
673     * GNU.sparse.numblocks=numblocks
674     * repeat numblocks times
675     *   GNU.sparse.offset=offset
676     *   GNU.sparse.numbytes=numbytes
677     * end repeat
678     *
679     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
680     *
681     * GNU.sparse.map
682     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
683     *
684     * @param inputStream input stream to read keys and values
685     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
686     *                      the sparse headers need to be stored in an array, not a map
687     * @param globalPaxHeaders global PAX headers of the tar archive
688     * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry.
689     * @throws IOException if an I/O error occurs.
690     * @deprecated use the four-arg version instead
691     */
692    @Deprecated
693    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders)
694            throws IOException {
695        return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
696    }
697
698    /**
699     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
700     * may appear multi times, and they look like:
701     *
702     * GNU.sparse.size=size
703     * GNU.sparse.numblocks=numblocks
704     * repeat numblocks times
705     *   GNU.sparse.offset=offset
706     *   GNU.sparse.numbytes=numbytes
707     * end repeat
708     *
709     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
710     *
711     * GNU.sparse.map
712     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
713     *
714     * @param inputStream input stream to read keys and values
715     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
716     *                      the sparse headers need to be stored in an array, not a map
717     * @param globalPaxHeaders global PAX headers of the tar archive
718     * @param headerSize total size of the PAX header, will be ignored if negative
719     * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry.
720     * @throws IOException if an I/O error occurs.
721     * @since 1.21
722     */
723    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream,
724            final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders,
725            final long headerSize) throws IOException {
726        final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
727        Long offset = null;
728        // Format is "length keyword=value\n";
729        int totalRead = 0;
730        while(true) { // get length
731            int ch;
732            int len = 0;
733            int read = 0;
734            while((ch = inputStream.read()) != -1) {
735                read++;
736                totalRead++;
737                if (ch == '\n') { // blank line in header
738                    break;
739                }
740                if (ch == ' '){ // End of length string
741                    // Get keyword
742                    final ByteArrayOutputStream coll = new ByteArrayOutputStream();
743                    while((ch = inputStream.read()) != -1) {
744                        read++;
745                        totalRead++;
746                        if (totalRead < 0 || (headerSize >= 0 && totalRead >= headerSize)) {
747                            break;
748                        }
749                        if (ch == '='){ // end of keyword
750                            final String keyword = coll.toString(CharsetNames.UTF_8);
751                            // Get rest of entry
752                            final int restLen = len - read;
753                            if (restLen <= 1) { // only NL
754                                headers.remove(keyword);
755                            } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
756                                throw new IOException("Paxheader value size " + restLen
757                                    + " exceeds size of header record");
758                            } else {
759                                final byte[] rest = IOUtils.readRange(inputStream, restLen);
760                                final int got = rest.length;
761                                if (got != restLen) {
762                                    throw new IOException("Failed to read "
763                                            + "Paxheader. Expected "
764                                            + restLen
765                                            + " bytes, read "
766                                            + got);
767                                }
768                                totalRead += restLen;
769                                // Drop trailing NL
770                                if (rest[restLen - 1] != '\n') {
771                                    throw new IOException("Failed to read Paxheader."
772                                       + "Value should end with a newline");
773                                }
774                                final String value = new String(rest, 0, restLen - 1, UTF_8);
775                                headers.put(keyword, value);
776
777                                // for 0.0 PAX Headers
778                                if (keyword.equals("GNU.sparse.offset")) {
779                                    if (offset != null) {
780                                        // previous GNU.sparse.offset header but but no numBytes
781                                        sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
782                                    }
783                                    try {
784                                        offset = Long.valueOf(value);
785                                    } catch (NumberFormatException ex) {
786                                        throw new IOException("Failed to read Paxheader."
787                                            + "GNU.sparse.offset contains a non-numeric value");
788                                    }
789                                    if (offset < 0) {
790                                        throw new IOException("Failed to read Paxheader."
791                                            + "GNU.sparse.offset contains negative value");
792                                    }
793                                }
794
795                                // for 0.0 PAX Headers
796                                if (keyword.equals("GNU.sparse.numbytes")) {
797                                    if (offset == null) {
798                                        throw new IOException("Failed to read Paxheader." +
799                                                "GNU.sparse.offset is expected before GNU.sparse.numbytes shows up.");
800                                    }
801                                    long numbytes;
802                                    try {
803                                        numbytes = Long.parseLong(value);
804                                    } catch (NumberFormatException ex) {
805                                        throw new IOException("Failed to read Paxheader."
806                                            + "GNU.sparse.numbytes contains a non-numeric value.");
807                                    }
808                                    if (numbytes < 0) {
809                                        throw new IOException("Failed to read Paxheader."
810                                            + "GNU.sparse.numbytes contains negative value");
811                                    }
812                                    sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
813                                    offset = null;
814                                }
815                            }
816                            break;
817                        }
818                        coll.write((byte) ch);
819                    }
820                    break; // Processed single header
821                }
822
823                // COMPRESS-530 : throw if we encounter a non-number while reading length
824                if (ch < '0' || ch > '9') {
825                    throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
826                }
827
828                len *= 10;
829                len += ch - '0';
830            }
831            if (ch == -1){ // EOF
832                break;
833            }
834        }
835        if (offset != null) {
836            // offset but no numBytes
837            sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
838        }
839        return headers;
840    }
841
842    /**
843     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
844     * GNU.sparse.map
845     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
846     *
847     * <p>Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You
848     * should use {@link #parseFromPAX01SparseHeaders} directly instead.
849     *
850     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
851     * @return sparse headers parsed from sparse map
852     * @deprecated use #parseFromPAX01SparseHeaders instead
853     */
854    @Deprecated
855    protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(String sparseMap) {
856        try {
857            return parseFromPAX01SparseHeaders(sparseMap);
858        } catch (IOException ex) {
859            throw new UncheckedIOException(ex.getMessage(), ex);
860        }
861    }
862
863    /**
864     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
865     * GNU.sparse.map
866     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
867     *
868     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
869     * @return unmodifiable list of sparse headers parsed from sparse map
870     * @throws IOException Corrupted TAR archive.
871     * @since 1.21
872     */
873    protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(String sparseMap)
874        throws IOException {
875        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
876        String[] sparseHeaderStrings = sparseMap.split(",");
877        if (sparseHeaderStrings.length % 2 == 1) {
878            throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
879        }
880
881        for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
882            long sparseOffset;
883            try {
884                sparseOffset = Long.parseLong(sparseHeaderStrings[i]);
885            } catch (NumberFormatException ex) {
886                throw new IOException("Corrupted TAR archive."
887                    + " Sparse struct offset contains a non-numeric value");
888            }
889            if (sparseOffset < 0) {
890                throw new IOException("Corrupted TAR archive."
891                    + " Sparse struct offset contains negative value");
892            }
893            long sparseNumbytes;
894            try {
895                sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]);
896            } catch (NumberFormatException ex) {
897                throw new IOException("Corrupted TAR archive."
898                    + " Sparse struct numbytes contains a non-numeric value");
899            }
900            if (sparseNumbytes < 0) {
901                throw new IOException("Corrupted TAR archive."
902                    + " Sparse struct numbytes contains negative value");
903            }
904            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
905        }
906
907        return Collections.unmodifiableList(sparseHeaders);
908    }
909
910    /**
911     * For PAX Format 1.X:
912     * The sparse map itself is stored in the file data block, preceding the actual file data.
913     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
914     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
915     * giving the offset and size of the data block it describes.
916     * @param inputStream parsing source.
917     * @param recordSize The size the TAR header
918     * @return sparse headers
919     * @throws IOException if an I/O error occurs.
920     */
921    protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
922        // for 1.X PAX Headers
923        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
924        long bytesRead = 0;
925
926        long[] readResult = readLineOfNumberForPax1X(inputStream);
927        long sparseHeadersCount = readResult[0];
928        if (sparseHeadersCount < 0) {
929            // overflow while reading number?
930            throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
931        }
932        bytesRead += readResult[1];
933        while (sparseHeadersCount-- > 0) {
934            readResult = readLineOfNumberForPax1X(inputStream);
935            final long sparseOffset = readResult[0];
936            if (sparseOffset < 0) {
937                throw new IOException("Corrupted TAR archive."
938                    + " Sparse header block offset contains negative value");
939            }
940            bytesRead += readResult[1];
941
942            readResult = readLineOfNumberForPax1X(inputStream);
943            final long sparseNumbytes = readResult[0];
944            if (sparseNumbytes < 0) {
945                throw new IOException("Corrupted TAR archive."
946                    + " Sparse header block numbytes contains negative value");
947            }
948            bytesRead += readResult[1];
949            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
950        }
951
952        // skip the rest of this record data
953        long bytesToSkip = recordSize - bytesRead % recordSize;
954        IOUtils.skip(inputStream, bytesToSkip);
955        return sparseHeaders;
956    }
957
958    /**
959     * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data.
960     * It consists of a series of decimal numbers delimited by newlines.
961     *
962     * @param inputStream the input stream of the tar file
963     * @return the decimal number delimited by '\n', and the bytes read from input stream
964     * @throws IOException
965     */
966    private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException {
967        int number;
968        long result = 0;
969        long bytesRead = 0;
970
971        while ((number = inputStream.read()) != '\n') {
972            bytesRead += 1;
973            if (number == -1) {
974                throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
975            }
976            if (number < '0' || number > '9') {
977                throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
978            }
979            result = result * 10 + (number - '0');
980        }
981        bytesRead += 1;
982
983        return new long[]{result, bytesRead};
984    }
985
986}