001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import static java.nio.charset.StandardCharsets.UTF_8; 022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN; 023import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET; 024import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_NUMBYTES_LEN; 025import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_OFFSET_LEN; 026 027import java.io.ByteArrayOutputStream; 028import java.io.IOException; 029import java.io.InputStream; 030import java.io.UncheckedIOException; 031import java.math.BigInteger; 032import java.nio.Buffer; 033import java.nio.ByteBuffer; 034import java.nio.charset.Charset; 035import java.util.ArrayList; 036import java.util.Collections; 037import java.util.HashMap; 038import java.util.List; 039import java.util.Map; 040 041import org.apache.commons.compress.archivers.zip.ZipEncoding; 042import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 043import org.apache.commons.compress.utils.CharsetNames; 044import org.apache.commons.compress.utils.IOUtils; 045 046/** 047 * This class provides static utility methods to work with byte streams. 048 * 049 * @Immutable 050 */ 051// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 052public class TarUtils { 053 054 private static final int BYTE_MASK = 255; 055 056 static final ZipEncoding DEFAULT_ENCODING = 057 ZipEncodingHelper.getZipEncoding(null); 058 059 /** 060 * Encapsulates the algorithms used up to Commons Compress 1.3 as 061 * ZipEncoding. 062 */ 063 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 064 @Override 065 public boolean canEncode(final String name) { return true; } 066 067 @Override 068 public ByteBuffer encode(final String name) { 069 final int length = name.length(); 070 final byte[] buf = new byte[length]; 071 072 // copy until end of input or output is reached. 073 for (int i = 0; i < length; ++i) { 074 buf[i] = (byte) name.charAt(i); 075 } 076 return ByteBuffer.wrap(buf); 077 } 078 079 @Override 080 public String decode(final byte[] buffer) { 081 final int length = buffer.length; 082 final StringBuilder result = new StringBuilder(length); 083 084 for (final byte b : buffer) { 085 if (b == 0) { // Trailing null 086 break; 087 } 088 result.append((char) (b & 0xFF)); // Allow for sign-extension 089 } 090 091 return result.toString(); 092 } 093 }; 094 095 /** Private constructor to prevent instantiation of this utility class. */ 096 private TarUtils(){ 097 } 098 099 /** 100 * Parse an octal string from a buffer. 101 * 102 * <p>Leading spaces are ignored. 103 * The buffer must contain a trailing space or NUL, 104 * and may contain an additional trailing space or NUL.</p> 105 * 106 * <p>The input buffer is allowed to contain all NULs, 107 * in which case the method returns 0L 108 * (this allows for missing fields).</p> 109 * 110 * <p>To work-around some tar implementations that insert a 111 * leading NUL this method returns 0 if it detects a leading NUL 112 * since Commons Compress 1.4.</p> 113 * 114 * @param buffer The buffer from which to parse. 115 * @param offset The offset into the buffer from which to parse. 116 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 117 * @return The long value of the octal string. 118 * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. 119 */ 120 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 121 long result = 0; 122 int end = offset + length; 123 int start = offset; 124 125 if (length < 2) { 126 throw new IllegalArgumentException("Length " + length + " must be at least 2"); 127 } 128 129 if (buffer[start] == 0) { 130 return 0L; 131 } 132 133 // Skip leading spaces 134 while (start < end) { 135 if (buffer[start] != ' ') { 136 break; 137 } 138 start++; 139 } 140 141 // Trim all trailing NULs and spaces. 142 // The ustar and POSIX tar specs require a trailing NUL or 143 // space but some implementations use the extra digit for big 144 // sizes/uids/gids ... 145 byte trailer = buffer[end - 1]; 146 while (start < end && (trailer == 0 || trailer == ' ')) { 147 end--; 148 trailer = buffer[end - 1]; 149 } 150 151 for (; start < end; start++) { 152 final byte currentByte = buffer[start]; 153 // CheckStyle:MagicNumber OFF 154 if (currentByte < '0' || currentByte > '7') { 155 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte)); 156 } 157 result = (result << 3) + (currentByte - '0'); // convert from ASCII 158 // CheckStyle:MagicNumber ON 159 } 160 161 return result; 162 } 163 164 /** 165 * Compute the value contained in a byte buffer. If the most 166 * significant bit of the first byte in the buffer is set, this 167 * bit is ignored and the rest of the buffer is interpreted as a 168 * binary number. Otherwise, the buffer is interpreted as an 169 * octal number as per the parseOctal function above. 170 * 171 * @param buffer The buffer from which to parse. 172 * @param offset The offset into the buffer from which to parse. 173 * @param length The maximum number of bytes to parse. 174 * @return The long value of the octal or binary string. 175 * @throws IllegalArgumentException if the trailing space/NUL is 176 * missing or an invalid byte is detected in an octal number, or 177 * if a binary number would exceed the size of a signed long 178 * 64-bit integer. 179 * @since 1.4 180 */ 181 public static long parseOctalOrBinary(final byte[] buffer, final int offset, 182 final int length) { 183 184 if ((buffer[offset] & 0x80) == 0) { 185 return parseOctal(buffer, offset, length); 186 } 187 final boolean negative = buffer[offset] == (byte) 0xff; 188 if (length < 9) { 189 return parseBinaryLong(buffer, offset, length, negative); 190 } 191 return parseBinaryBigInteger(buffer, offset, length, negative); 192 } 193 194 private static long parseBinaryLong(final byte[] buffer, final int offset, 195 final int length, 196 final boolean negative) { 197 if (length >= 9) { 198 throw new IllegalArgumentException("At offset " + offset + ", " 199 + length + " byte binary number" 200 + " exceeds maximum signed long" 201 + " value"); 202 } 203 long val = 0; 204 for (int i = 1; i < length; i++) { 205 val = (val << 8) + (buffer[offset + i] & 0xff); 206 } 207 if (negative) { 208 // 2's complement 209 val--; 210 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 211 } 212 return negative ? -val : val; 213 } 214 215 private static long parseBinaryBigInteger(final byte[] buffer, 216 final int offset, 217 final int length, 218 final boolean negative) { 219 final byte[] remainder = new byte[length - 1]; 220 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 221 BigInteger val = new BigInteger(remainder); 222 if (negative) { 223 // 2's complement 224 val = val.add(BigInteger.valueOf(-1)).not(); 225 } 226 if (val.bitLength() > 63) { 227 throw new IllegalArgumentException("At offset " + offset + ", " 228 + length + " byte binary number" 229 + " exceeds maximum signed long" 230 + " value"); 231 } 232 return negative ? -val.longValue() : val.longValue(); 233 } 234 235 /** 236 * Parse a boolean byte from a buffer. 237 * Leading spaces and NUL are ignored. 238 * The buffer may contain trailing spaces or NULs. 239 * 240 * @param buffer The buffer from which to parse. 241 * @param offset The offset into the buffer from which to parse. 242 * @return The boolean value of the bytes. 243 * @throws IllegalArgumentException if an invalid byte is detected. 244 */ 245 public static boolean parseBoolean(final byte[] buffer, final int offset) { 246 return buffer[offset] == 1; 247 } 248 249 // Helper method to generate the exception message 250 private static String exceptionMessage(final byte[] buffer, final int offset, 251 final int length, final int current, final byte currentByte) { 252 // default charset is good enough for an exception message, 253 // 254 // the alternative was to modify parseOctal and 255 // parseOctalOrBinary to receive the ZipEncoding of the 256 // archive (deprecating the existing public methods, of 257 // course) and dealing with the fact that ZipEncoding#decode 258 // can throw an IOException which parseOctal* doesn't declare 259 String string = new String(buffer, offset, length, Charset.defaultCharset()); 260 261 string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed 262 return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length; 263 } 264 265 /** 266 * Parse an entry name from a buffer. 267 * Parsing stops when a NUL is found 268 * or the buffer length is reached. 269 * 270 * @param buffer The buffer from which to parse. 271 * @param offset The offset into the buffer from which to parse. 272 * @param length The maximum number of bytes to parse. 273 * @return The entry name. 274 */ 275 public static String parseName(final byte[] buffer, final int offset, final int length) { 276 try { 277 return parseName(buffer, offset, length, DEFAULT_ENCODING); 278 } catch (final IOException ex) { // NOSONAR 279 try { 280 return parseName(buffer, offset, length, FALLBACK_ENCODING); 281 } catch (final IOException ex2) { 282 // impossible 283 throw new UncheckedIOException(ex2); //NOSONAR 284 } 285 } 286 } 287 288 /** 289 * Parse an entry name from a buffer. 290 * Parsing stops when a NUL is found 291 * or the buffer length is reached. 292 * 293 * @param buffer The buffer from which to parse. 294 * @param offset The offset into the buffer from which to parse. 295 * @param length The maximum number of bytes to parse. 296 * @param encoding name of the encoding to use for file names 297 * @since 1.4 298 * @return The entry name. 299 * @throws IOException on error 300 */ 301 public static String parseName(final byte[] buffer, final int offset, 302 final int length, 303 final ZipEncoding encoding) 304 throws IOException { 305 306 int len = 0; 307 for (int i = offset; len < length && buffer[i] != 0; i++) { 308 len++; 309 } 310 if (len > 0) { 311 final byte[] b = new byte[len]; 312 System.arraycopy(buffer, offset, b, 0, len); 313 return encoding.decode(b); 314 } 315 return ""; 316 } 317 318 /** 319 * Parses the content of a PAX 1.0 sparse block. 320 * @since 1.20 321 * @param buffer The buffer from which to parse. 322 * @param offset The offset into the buffer from which to parse. 323 * @return a parsed sparse struct 324 */ 325 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { 326 final long sparseOffset = parseOctalOrBinary(buffer, offset, SPARSE_OFFSET_LEN); 327 final long sparseNumbytes = parseOctalOrBinary(buffer, offset + SPARSE_OFFSET_LEN, SPARSE_NUMBYTES_LEN); 328 329 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); 330 } 331 332 /** 333 * @since 1.21 334 */ 335 static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) 336 throws IOException { 337 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 338 for (int i = 0; i < entries; i++) { 339 try { 340 final TarArchiveStructSparse sparseHeader = 341 parseSparse(buffer, offset + i * (SPARSE_OFFSET_LEN + SPARSE_NUMBYTES_LEN)); 342 343 if (sparseHeader.getOffset() < 0) { 344 throw new IOException("Corrupted TAR archive, sparse entry with negative offset"); 345 } 346 if (sparseHeader.getNumbytes() < 0) { 347 throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); 348 } 349 sparseHeaders.add(sparseHeader); 350 } catch (IllegalArgumentException ex) { 351 // thrown internally by parseOctalOrBinary 352 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); 353 } 354 } 355 return Collections.unmodifiableList(sparseHeaders); 356 } 357 358 /** 359 * Copy a name into a buffer. 360 * Copies characters from the name into the buffer 361 * starting at the specified offset. 362 * If the buffer is longer than the name, the buffer 363 * is filled with trailing NULs. 364 * If the name is longer than the buffer, 365 * the output is truncated. 366 * 367 * @param name The header name from which to copy the characters. 368 * @param buf The buffer where the name is to be stored. 369 * @param offset The starting offset into the buffer 370 * @param length The maximum number of header bytes to copy. 371 * @return The updated offset, i.e. offset + length 372 */ 373 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 374 try { 375 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 376 } catch (final IOException ex) { // NOSONAR 377 try { 378 return formatNameBytes(name, buf, offset, length, 379 FALLBACK_ENCODING); 380 } catch (final IOException ex2) { 381 // impossible 382 throw new UncheckedIOException(ex2); //NOSONAR 383 } 384 } 385 } 386 387 /** 388 * Copy a name into a buffer. 389 * Copies characters from the name into the buffer 390 * starting at the specified offset. 391 * If the buffer is longer than the name, the buffer 392 * is filled with trailing NULs. 393 * If the name is longer than the buffer, 394 * the output is truncated. 395 * 396 * @param name The header name from which to copy the characters. 397 * @param buf The buffer where the name is to be stored. 398 * @param offset The starting offset into the buffer 399 * @param length The maximum number of header bytes to copy. 400 * @param encoding name of the encoding to use for file names 401 * @since 1.4 402 * @return The updated offset, i.e. offset + length 403 * @throws IOException on error 404 */ 405 public static int formatNameBytes(final String name, final byte[] buf, final int offset, 406 final int length, 407 final ZipEncoding encoding) 408 throws IOException { 409 int len = name.length(); 410 ByteBuffer b = encoding.encode(name); 411 while (((Buffer)b).limit() > length && len > 0) { 412 b = encoding.encode(name.substring(0, --len)); 413 } 414 final int limit = ((Buffer)b).limit() - ((Buffer)b).position(); 415 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 416 417 // Pad any remaining output bytes with NUL 418 for (int i = limit; i < length; ++i) { 419 buf[offset + i] = 0; 420 } 421 422 return offset + length; 423 } 424 425 /** 426 * Fill buffer with unsigned octal number, padded with leading zeroes. 427 * 428 * @param value number to convert to octal - treated as unsigned 429 * @param buffer destination buffer 430 * @param offset starting offset in buffer 431 * @param length length of buffer to fill 432 * @throws IllegalArgumentException if the value will not fit in the buffer 433 */ 434 public static void formatUnsignedOctalString(final long value, final byte[] buffer, 435 final int offset, final int length) { 436 int remaining = length; 437 remaining--; 438 if (value == 0) { 439 buffer[offset + remaining--] = (byte) '0'; 440 } else { 441 long val = value; 442 for (; remaining >= 0 && val != 0; --remaining) { 443 // CheckStyle:MagicNumber OFF 444 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 445 val = val >>> 3; 446 // CheckStyle:MagicNumber ON 447 } 448 if (val != 0){ 449 throw new IllegalArgumentException 450 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); 451 } 452 } 453 454 for (; remaining >= 0; --remaining) { // leading zeros 455 buffer[offset + remaining] = (byte) '0'; 456 } 457 } 458 459 /** 460 * Write an octal integer into a buffer. 461 * 462 * Uses {@link #formatUnsignedOctalString} to format 463 * the value as an octal string with leading zeros. 464 * The converted number is followed by space and NUL 465 * 466 * @param value The value to write 467 * @param buf The buffer to receive the output 468 * @param offset The starting offset into the buffer 469 * @param length The size of the output buffer 470 * @return The updated offset, i.e offset+length 471 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 472 */ 473 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 474 475 int idx=length-2; // For space and trailing null 476 formatUnsignedOctalString(value, buf, offset, idx); 477 478 buf[offset + idx++] = (byte) ' '; // Trailing space 479 buf[offset + idx] = 0; // Trailing null 480 481 return offset + length; 482 } 483 484 /** 485 * Write an octal long integer into a buffer. 486 * 487 * Uses {@link #formatUnsignedOctalString} to format 488 * the value as an octal string with leading zeros. 489 * The converted number is followed by a space. 490 * 491 * @param value The value to write as octal 492 * @param buf The destinationbuffer. 493 * @param offset The starting offset into the buffer. 494 * @param length The length of the buffer 495 * @return The updated offset 496 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 497 */ 498 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 499 500 final int idx=length-1; // For space 501 502 formatUnsignedOctalString(value, buf, offset, idx); 503 buf[offset + idx] = (byte) ' '; // Trailing space 504 505 return offset + length; 506 } 507 508 /** 509 * Write an long integer into a buffer as an octal string if this 510 * will fit, or as a binary number otherwise. 511 * 512 * Uses {@link #formatUnsignedOctalString} to format 513 * the value as an octal string with leading zeros. 514 * The converted number is followed by a space. 515 * 516 * @param value The value to write into the buffer. 517 * @param buf The destination buffer. 518 * @param offset The starting offset into the buffer. 519 * @param length The length of the buffer. 520 * @return The updated offset. 521 * @throws IllegalArgumentException if the value (and trailer) 522 * will not fit in the buffer. 523 * @since 1.4 524 */ 525 public static int formatLongOctalOrBinaryBytes( 526 final long value, final byte[] buf, final int offset, final int length) { 527 528 // Check whether we are dealing with UID/GID or SIZE field 529 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 530 531 final boolean negative = value < 0; 532 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 533 return formatLongOctalBytes(value, buf, offset, length); 534 } 535 536 if (length < 9) { 537 formatLongBinary(value, buf, offset, length, negative); 538 } else { 539 formatBigIntegerBinary(value, buf, offset, length, negative); 540 } 541 542 buf[offset] = (byte) (negative ? 0xff : 0x80); 543 return offset + length; 544 } 545 546 private static void formatLongBinary(final long value, final byte[] buf, 547 final int offset, final int length, 548 final boolean negative) { 549 final int bits = (length - 1) * 8; 550 final long max = 1L << bits; 551 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE 552 if (val < 0 || val >= max) { 553 throw new IllegalArgumentException("Value " + value + 554 " is too large for " + length + " byte field."); 555 } 556 if (negative) { 557 val ^= max - 1; 558 val++; 559 val |= 0xffL << bits; 560 } 561 for (int i = offset + length - 1; i >= offset; i--) { 562 buf[i] = (byte) val; 563 val >>= 8; 564 } 565 } 566 567 private static void formatBigIntegerBinary(final long value, final byte[] buf, 568 final int offset, 569 final int length, 570 final boolean negative) { 571 final BigInteger val = BigInteger.valueOf(value); 572 final byte[] b = val.toByteArray(); 573 final int len = b.length; 574 if (len > length - 1) { 575 throw new IllegalArgumentException("Value " + value + 576 " is too large for " + length + " byte field."); 577 } 578 final int off = offset + length - len; 579 System.arraycopy(b, 0, buf, off, len); 580 final byte fill = (byte) (negative ? 0xff : 0); 581 for (int i = offset + 1; i < off; i++) { 582 buf[i] = fill; 583 } 584 } 585 586 /** 587 * Writes an octal value into a buffer. 588 * 589 * Uses {@link #formatUnsignedOctalString} to format 590 * the value as an octal string with leading zeros. 591 * The converted number is followed by NUL and then space. 592 * 593 * @param value The value to convert 594 * @param buf The destination buffer 595 * @param offset The starting offset into the buffer. 596 * @param length The size of the buffer. 597 * @return The updated value of offset, i.e. offset+length 598 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 599 */ 600 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 601 602 int idx=length-2; // for NUL and space 603 formatUnsignedOctalString(value, buf, offset, idx); 604 605 buf[offset + idx++] = 0; // Trailing null 606 buf[offset + idx] = (byte) ' '; // Trailing space 607 608 return offset + length; 609 } 610 611 /** 612 * Compute the checksum of a tar entry header. 613 * 614 * @param buf The tar entry's header buffer. 615 * @return The computed checksum. 616 */ 617 public static long computeCheckSum(final byte[] buf) { 618 long sum = 0; 619 620 for (final byte element : buf) { 621 sum += BYTE_MASK & element; 622 } 623 624 return sum; 625 } 626 627 /** 628 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: 629 * <blockquote> 630 * The checksum is calculated by taking the sum of the unsigned byte values 631 * of the header block with the eight checksum bytes taken to be ascii 632 * spaces (decimal value 32). It is stored as a six digit octal number with 633 * leading zeroes followed by a NUL and then a space. Various 634 * implementations do not adhere to this format. For better compatibility, 635 * ignore leading and trailing whitespace, and get the first six digits. In 636 * addition, some historic tar implementations treated bytes as signed. 637 * Implementations typically calculate the checksum both ways, and treat it 638 * as good if either the signed or unsigned sum matches the included 639 * checksum. 640 * </blockquote> 641 * <p> 642 * The return value of this method should be treated as a best-effort 643 * heuristic rather than an absolute and final truth. The checksum 644 * verification logic may well evolve over time as more special cases 645 * are encountered. 646 * 647 * @param header tar header 648 * @return whether the checksum is reasonably good 649 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 650 * @since 1.5 651 */ 652 public static boolean verifyCheckSum(final byte[] header) { 653 final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN); 654 long unsignedSum = 0; 655 long signedSum = 0; 656 657 for (int i = 0; i < header.length; i++) { 658 byte b = header[i]; 659 if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) { 660 b = ' '; 661 } 662 unsignedSum += 0xff & b; 663 signedSum += b; 664 } 665 return storedSum == unsignedSum || storedSum == signedSum; 666 } 667 668 /** 669 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 670 * may appear multi times, and they look like: 671 * 672 * GNU.sparse.size=size 673 * GNU.sparse.numblocks=numblocks 674 * repeat numblocks times 675 * GNU.sparse.offset=offset 676 * GNU.sparse.numbytes=numbytes 677 * end repeat 678 * 679 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 680 * 681 * GNU.sparse.map 682 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 683 * 684 * @param inputStream input stream to read keys and values 685 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 686 * the sparse headers need to be stored in an array, not a map 687 * @param globalPaxHeaders global PAX headers of the tar archive 688 * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry. 689 * @throws IOException if an I/O error occurs. 690 * @deprecated use the four-arg version instead 691 */ 692 @Deprecated 693 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders) 694 throws IOException { 695 return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1); 696 } 697 698 /** 699 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 700 * may appear multi times, and they look like: 701 * 702 * GNU.sparse.size=size 703 * GNU.sparse.numblocks=numblocks 704 * repeat numblocks times 705 * GNU.sparse.offset=offset 706 * GNU.sparse.numbytes=numbytes 707 * end repeat 708 * 709 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 710 * 711 * GNU.sparse.map 712 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 713 * 714 * @param inputStream input stream to read keys and values 715 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 716 * the sparse headers need to be stored in an array, not a map 717 * @param globalPaxHeaders global PAX headers of the tar archive 718 * @param headerSize total size of the PAX header, will be ignored if negative 719 * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry. 720 * @throws IOException if an I/O error occurs. 721 * @since 1.21 722 */ 723 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, 724 final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders, 725 final long headerSize) throws IOException { 726 final Map<String, String> headers = new HashMap<>(globalPaxHeaders); 727 Long offset = null; 728 // Format is "length keyword=value\n"; 729 int totalRead = 0; 730 while(true) { // get length 731 int ch; 732 int len = 0; 733 int read = 0; 734 while((ch = inputStream.read()) != -1) { 735 read++; 736 totalRead++; 737 if (ch == '\n') { // blank line in header 738 break; 739 } 740 if (ch == ' '){ // End of length string 741 // Get keyword 742 final ByteArrayOutputStream coll = new ByteArrayOutputStream(); 743 while((ch = inputStream.read()) != -1) { 744 read++; 745 totalRead++; 746 if (totalRead < 0 || (headerSize >= 0 && totalRead >= headerSize)) { 747 break; 748 } 749 if (ch == '='){ // end of keyword 750 final String keyword = coll.toString(CharsetNames.UTF_8); 751 // Get rest of entry 752 final int restLen = len - read; 753 if (restLen <= 1) { // only NL 754 headers.remove(keyword); 755 } else if (headerSize >= 0 && restLen > headerSize - totalRead) { 756 throw new IOException("Paxheader value size " + restLen 757 + " exceeds size of header record"); 758 } else { 759 final byte[] rest = IOUtils.readRange(inputStream, restLen); 760 final int got = rest.length; 761 if (got != restLen) { 762 throw new IOException("Failed to read " 763 + "Paxheader. Expected " 764 + restLen 765 + " bytes, read " 766 + got); 767 } 768 totalRead += restLen; 769 // Drop trailing NL 770 if (rest[restLen - 1] != '\n') { 771 throw new IOException("Failed to read Paxheader." 772 + "Value should end with a newline"); 773 } 774 final String value = new String(rest, 0, restLen - 1, UTF_8); 775 headers.put(keyword, value); 776 777 // for 0.0 PAX Headers 778 if (keyword.equals("GNU.sparse.offset")) { 779 if (offset != null) { 780 // previous GNU.sparse.offset header but but no numBytes 781 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 782 } 783 try { 784 offset = Long.valueOf(value); 785 } catch (NumberFormatException ex) { 786 throw new IOException("Failed to read Paxheader." 787 + "GNU.sparse.offset contains a non-numeric value"); 788 } 789 if (offset < 0) { 790 throw new IOException("Failed to read Paxheader." 791 + "GNU.sparse.offset contains negative value"); 792 } 793 } 794 795 // for 0.0 PAX Headers 796 if (keyword.equals("GNU.sparse.numbytes")) { 797 if (offset == null) { 798 throw new IOException("Failed to read Paxheader." + 799 "GNU.sparse.offset is expected before GNU.sparse.numbytes shows up."); 800 } 801 long numbytes; 802 try { 803 numbytes = Long.parseLong(value); 804 } catch (NumberFormatException ex) { 805 throw new IOException("Failed to read Paxheader." 806 + "GNU.sparse.numbytes contains a non-numeric value."); 807 } 808 if (numbytes < 0) { 809 throw new IOException("Failed to read Paxheader." 810 + "GNU.sparse.numbytes contains negative value"); 811 } 812 sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes)); 813 offset = null; 814 } 815 } 816 break; 817 } 818 coll.write((byte) ch); 819 } 820 break; // Processed single header 821 } 822 823 // COMPRESS-530 : throw if we encounter a non-number while reading length 824 if (ch < '0' || ch > '9') { 825 throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length"); 826 } 827 828 len *= 10; 829 len += ch - '0'; 830 } 831 if (ch == -1){ // EOF 832 break; 833 } 834 } 835 if (offset != null) { 836 // offset but no numBytes 837 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 838 } 839 return headers; 840 } 841 842 /** 843 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 844 * GNU.sparse.map 845 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 846 * 847 * <p>Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You 848 * should use {@link #parseFromPAX01SparseHeaders} directly instead. 849 * 850 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 851 * @return sparse headers parsed from sparse map 852 * @deprecated use #parseFromPAX01SparseHeaders instead 853 */ 854 @Deprecated 855 protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(String sparseMap) { 856 try { 857 return parseFromPAX01SparseHeaders(sparseMap); 858 } catch (IOException ex) { 859 throw new UncheckedIOException(ex.getMessage(), ex); 860 } 861 } 862 863 /** 864 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 865 * GNU.sparse.map 866 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 867 * 868 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 869 * @return unmodifiable list of sparse headers parsed from sparse map 870 * @throws IOException Corrupted TAR archive. 871 * @since 1.21 872 */ 873 protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(String sparseMap) 874 throws IOException { 875 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 876 String[] sparseHeaderStrings = sparseMap.split(","); 877 if (sparseHeaderStrings.length % 2 == 1) { 878 throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header"); 879 } 880 881 for (int i = 0; i < sparseHeaderStrings.length; i += 2) { 882 long sparseOffset; 883 try { 884 sparseOffset = Long.parseLong(sparseHeaderStrings[i]); 885 } catch (NumberFormatException ex) { 886 throw new IOException("Corrupted TAR archive." 887 + " Sparse struct offset contains a non-numeric value"); 888 } 889 if (sparseOffset < 0) { 890 throw new IOException("Corrupted TAR archive." 891 + " Sparse struct offset contains negative value"); 892 } 893 long sparseNumbytes; 894 try { 895 sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]); 896 } catch (NumberFormatException ex) { 897 throw new IOException("Corrupted TAR archive." 898 + " Sparse struct numbytes contains a non-numeric value"); 899 } 900 if (sparseNumbytes < 0) { 901 throw new IOException("Corrupted TAR archive." 902 + " Sparse struct numbytes contains negative value"); 903 } 904 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 905 } 906 907 return Collections.unmodifiableList(sparseHeaders); 908 } 909 910 /** 911 * For PAX Format 1.X: 912 * The sparse map itself is stored in the file data block, preceding the actual file data. 913 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 914 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 915 * giving the offset and size of the data block it describes. 916 * @param inputStream parsing source. 917 * @param recordSize The size the TAR header 918 * @return sparse headers 919 * @throws IOException if an I/O error occurs. 920 */ 921 protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException { 922 // for 1.X PAX Headers 923 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 924 long bytesRead = 0; 925 926 long[] readResult = readLineOfNumberForPax1X(inputStream); 927 long sparseHeadersCount = readResult[0]; 928 if (sparseHeadersCount < 0) { 929 // overflow while reading number? 930 throw new IOException("Corrupted TAR archive. Negative value in sparse headers block"); 931 } 932 bytesRead += readResult[1]; 933 while (sparseHeadersCount-- > 0) { 934 readResult = readLineOfNumberForPax1X(inputStream); 935 final long sparseOffset = readResult[0]; 936 if (sparseOffset < 0) { 937 throw new IOException("Corrupted TAR archive." 938 + " Sparse header block offset contains negative value"); 939 } 940 bytesRead += readResult[1]; 941 942 readResult = readLineOfNumberForPax1X(inputStream); 943 final long sparseNumbytes = readResult[0]; 944 if (sparseNumbytes < 0) { 945 throw new IOException("Corrupted TAR archive." 946 + " Sparse header block numbytes contains negative value"); 947 } 948 bytesRead += readResult[1]; 949 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 950 } 951 952 // skip the rest of this record data 953 long bytesToSkip = recordSize - bytesRead % recordSize; 954 IOUtils.skip(inputStream, bytesToSkip); 955 return sparseHeaders; 956 } 957 958 /** 959 * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. 960 * It consists of a series of decimal numbers delimited by newlines. 961 * 962 * @param inputStream the input stream of the tar file 963 * @return the decimal number delimited by '\n', and the bytes read from input stream 964 * @throws IOException 965 */ 966 private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException { 967 int number; 968 long result = 0; 969 long bytesRead = 0; 970 971 while ((number = inputStream.read()) != '\n') { 972 bytesRead += 1; 973 if (number == -1) { 974 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format"); 975 } 976 if (number < '0' || number > '9') { 977 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block"); 978 } 979 result = result * 10 + (number - '0'); 980 } 981 bytesRead += 1; 982 983 return new long[]{result, bytesRead}; 984 } 985 986}