-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | ar-internal
--   
--   This library is for working with "<tt>.tar</tt>" archive files. It can
--   read and write a range of common variations of archive format
--   including V7, POSIX USTAR and GNU formats.
--   
--   It provides support for packing and unpacking portable archives. This
--   makes it suitable for distribution but not backup because details like
--   file ownership and exact permissions are not preserved.
--   
--   It also provides features for random access to archive content using
--   an index.
@package tar:tar-internal
@version 0.6.4.0


-- | Random access to the content of a <tt>.tar</tt> archive.
--   
--   This module uses common names and so is designed to be imported
--   qualified:
--   
--   <pre>
--   import qualified Codec.Archive.Tar.Index as TarIndex
--   </pre>
module Codec.Archive.Tar.Index

-- | An index of the entries in a tar file.
--   
--   This index type is designed to be quite compact and suitable to store
--   either on disk or in memory.
data TarIndex

-- | Look up a given filepath in the <a>TarIndex</a>. It may return a
--   <a>TarFileEntry</a> containing the <a>TarEntryOffset</a> of the file
--   within the tar file, or if the filepath identifies a directory then it
--   returns a <a>TarDir</a> containing the list of files within that
--   directory.
--   
--   Given the <a>TarEntryOffset</a> you can then use one of the I/O
--   operations:
--   
--   <ul>
--   <li><a>hReadEntry</a> to read the whole entry;</li>
--   <li><a>hReadEntryHeader</a> to read just the file metadata (e.g. its
--   length);</li>
--   </ul>
lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry

-- | The result of <a>lookup</a> in a <a>TarIndex</a>. It can either be a
--   file directly, or a directory entry containing further entries (and
--   all subdirectories recursively). Note that the subtrees are
--   constructed lazily, so it's cheaper if you don't look at them.
data TarIndexEntry
TarFileEntry :: {-# UNPACK #-} !TarEntryOffset -> TarIndexEntry
TarDir :: [(FilePath, TarIndexEntry)] -> TarIndexEntry

-- | All the files in the index with their corresponding
--   <a>TarEntryOffset</a>s.
--   
--   Note that the files are in no special order. If you intend to read all
--   or most files then is is recommended to sort by the
--   <a>TarEntryOffset</a>.
toList :: TarIndex -> [(FilePath, TarEntryOffset)]

-- | An offset within a tar file. Use <a>hReadEntry</a>,
--   <a>hReadEntryHeader</a> or <a>hSeekEntryOffset</a>.
--   
--   This is actually a tar "record" number, not a byte offset.
type TarEntryOffset = Word32

-- | Reads an entire <a>Entry</a> at the given <a>TarEntryOffset</a> in the
--   tar file. The <a>Handle</a> must be open for reading and be seekable.
--   
--   This reads the whole entry into memory strictly, not incrementally.
--   For more control, use <a>hReadEntryHeader</a> and then read the entry
--   content manually.
hReadEntry :: Handle -> TarEntryOffset -> IO Entry

-- | Read the header for a <a>Entry</a> at the given <a>TarEntryOffset</a>
--   in the tar file. The <a>entryContent</a> will contain the correct
--   metadata but an empty file content. The <a>Handle</a> must be open for
--   reading and be seekable.
--   
--   The <a>Handle</a> position is advanced to the beginning of the entry
--   content (if any). You must check the <a>entryContent</a> to see if the
--   entry is of type <a>NormalFile</a>. If it is, the <a>NormalFile</a>
--   gives the content length and you are free to read this much data from
--   the <a>Handle</a>.
--   
--   <pre>
--   entry &lt;- Tar.hReadEntryHeader hnd
--   case Tar.entryContent entry of
--     Tar.NormalFile _ size -&gt; do content &lt;- BS.hGet hnd size
--                                 ...
--   </pre>
--   
--   Of course you don't have to read it all in one go (as
--   <a>hReadEntry</a> does), you can use any appropriate method to read it
--   incrementally.
--   
--   In addition to I/O errors, this can throw a <a>FormatError</a> if the
--   offset is wrong, or if the file is not valid tar format.
--   
--   There is also the lower level operation <a>hSeekEntryOffset</a>.
hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry

-- | Build a <a>TarIndex</a> from a sequence of tar <a>Entries</a>. The
--   <a>Entries</a> are assumed to start at offset <tt>0</tt> within a
--   file.
build :: Entries e -> Either e TarIndex

-- | The intermediate type used for incremental construction of a
--   <a>TarIndex</a>.
data IndexBuilder

-- | The initial empty <a>IndexBuilder</a>.
empty :: IndexBuilder

-- | Add the next <a>Entry</a> into the <a>IndexBuilder</a>.
addNextEntry :: Entry -> IndexBuilder -> IndexBuilder

-- | Use this function if you want to skip some entries and not add them to
--   the final <a>TarIndex</a>.
skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder

-- | Finish accumulating <a>Entry</a> information and build the compact
--   <a>TarIndex</a> lookup structure.
finalise :: IndexBuilder -> TarIndex

-- | Resume building an existing index
--   
--   A <a>TarIndex</a> is optimized for a highly compact and efficient
--   in-memory representation. This, however, makes it read-only. If you
--   have an existing <a>TarIndex</a> for a large file, and want to add to
--   it, you can translate the <a>TarIndex</a> back to an
--   <a>IndexBuilder</a>. Be aware that this is a relatively costly
--   operation (linear in the size of the <a>TarIndex</a>), though still
--   faster than starting again from scratch.
--   
--   This is the left inverse to <a>finalise</a> (modulo ordering).
unfinalise :: TarIndex -> IndexBuilder

-- | The <a>TarIndex</a> is compact in memory, and it has a similarly
--   compact external representation.
serialise :: TarIndex -> ByteString

-- | Read the external representation back into a <a>TarIndex</a>.
deserialise :: ByteString -> Maybe (TarIndex, ByteString)

-- | This is a low level variant on <a>hReadEntryHeader</a>, that can be
--   used to iterate through a tar file, entry by entry.
--   
--   It has a few differences compared to <a>hReadEntryHeader</a>:
--   
--   <ul>
--   <li>It returns an indication when the end of the tar file is
--   reached.</li>
--   <li>It <i>does not</i> move the <a>Handle</a> position to the
--   beginning of the entry content.</li>
--   <li>It returns the <a>TarEntryOffset</a> of the next entry.</li>
--   </ul>
--   
--   After this action, the <a>Handle</a> position is not in any useful
--   place. If you want to skip to the next entry, take the
--   <a>TarEntryOffset</a> returned and use <a>hReadEntryHeaderOrEof</a>
--   again. Or if having inspected the <a>Entry</a> header you want to read
--   the entry content (if it has one) then use
--   <a>hSeekEntryContentOffset</a> on the original input
--   <a>TarEntryOffset</a>.
hReadEntryHeaderOrEof :: Handle -> TarEntryOffset -> IO (Maybe (Entry, TarEntryOffset))

-- | Set the <a>Handle</a> position to the position corresponding to the
--   given <a>TarEntryOffset</a>.
--   
--   This position is where the entry metadata can be read. If you already
--   know the entry has a body (and perhaps know it's length), you may wish
--   to seek to the body content directly using
--   <a>hSeekEntryContentOffset</a>.
hSeekEntryOffset :: Handle -> TarEntryOffset -> IO ()

-- | Set the <a>Handle</a> position to the entry content position
--   corresponding to the given <a>TarEntryOffset</a>.
--   
--   This position is where the entry content can be read using ordinary
--   I/O operations (though you have to know in advance how big the entry
--   content is). This is <i>only valid</i> if you <i>already know</i> the
--   entry has a body (i.e. is a normal file).
hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO ()

-- | Seek to the end of a tar file, to the position where new entries can
--   be appended, and return that <a>TarEntryOffset</a>.
--   
--   If you have a valid <a>TarIndex</a> for this tar file then you should
--   supply it because it allows seeking directly to the correct location.
--   
--   If you do not have an index, then this becomes an expensive linear
--   operation because we have to read each tar entry header from the
--   beginning to find the location immediately after the last entry (this
--   is because tar files have a variable length trailer and we cannot
--   reliably find that by starting at the end). In this mode, it will fail
--   with an exception if the file is not in fact in the tar format.
hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset

-- | Calculate the <a>TarEntryOffset</a> of the next entry, given the size
--   and offset of the current entry.
--   
--   This is much like using <a>skipNextEntry</a> and
--   <a>indexNextEntryOffset</a>, but without using an <a>IndexBuilder</a>.
nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset

-- | This is the offset immediately following the last entry in the tar
--   file. This can be useful to append further entries into the tar file.
--   Use with <a>hSeekEntryOffset</a>, or just use
--   <a>hSeekEndEntryOffset</a> directly.
indexEndEntryOffset :: TarIndex -> TarEntryOffset

-- | This is the offset immediately following the entry most recently added
--   to the <a>IndexBuilder</a>. You might use this if you need to know the
--   offsets but don't want to use the <a>TarIndex</a> lookup structure.
--   Use with <a>hSeekEntryOffset</a>. See also <a>nextEntryOffset</a>.
indexNextEntryOffset :: IndexBuilder -> TarEntryOffset


-- | Types and functions to manipulate tar entries.
--   
--   While the <a>Codec.Archive.Tar</a> module provides only the simple
--   high level API, this module provides full access to the details of tar
--   entries. This lets you inspect all the meta-data, construct entries
--   and handle error cases more precisely.
--   
--   This module uses common names and so is designed to be imported
--   qualified:
--   
--   <pre>
--   import qualified Codec.Archive.Tar       as Tar
--   import qualified Codec.Archive.Tar.Entry as Tar
--   </pre>
module Codec.Archive.Tar.Entry

-- | Polymorphic tar archive entry. High-level interfaces commonly work
--   with <a>GenEntry</a> <a>FilePath</a> <a>FilePath</a>, while low-level
--   ones use <a>GenEntry</a> <a>TarPath</a> <a>LinkTarget</a>.
data GenEntry tarPath linkTarget
Entry :: !tarPath -> !GenEntryContent linkTarget -> {-# UNPACK #-} !Permissions -> {-# UNPACK #-} !Ownership -> {-# UNPACK #-} !EpochTime -> !Format -> GenEntry tarPath linkTarget

-- | The path of the file or directory within the archive.
[entryTarPath] :: GenEntry tarPath linkTarget -> !tarPath

-- | The real content of the entry. For <a>NormalFile</a> this includes the
--   file data. An entry usually contains a <a>NormalFile</a> or a
--   <a>Directory</a>.
[entryContent] :: GenEntry tarPath linkTarget -> !GenEntryContent linkTarget

-- | File permissions (Unix style file mode).
[entryPermissions] :: GenEntry tarPath linkTarget -> {-# UNPACK #-} !Permissions

-- | The user and group to which this file belongs.
[entryOwnership] :: GenEntry tarPath linkTarget -> {-# UNPACK #-} !Ownership

-- | The time the file was last modified.
[entryTime] :: GenEntry tarPath linkTarget -> {-# UNPACK #-} !EpochTime

-- | The tar format the archive is using.
[entryFormat] :: GenEntry tarPath linkTarget -> !Format

-- | Monomorphic tar archive entry, ready for serialization /
--   deserialization.
type Entry = GenEntry TarPath LinkTarget

-- | Low-level function to get a native <a>FilePath</a> of the file or
--   directory within the archive, not accounting for long names. It's
--   likely that you want to apply <a>decodeLongNames</a> and use
--   <a>entryTarPath</a> afterwards instead of <a>entryPath</a>.
entryPath :: GenEntry TarPath linkTarget -> FilePath

-- | Polymorphic content of a tar archive entry. High-level interfaces
--   commonly work with <a>GenEntryContent</a> <a>FilePath</a>, while
--   low-level ones use <a>GenEntryContent</a> <a>LinkTarget</a>.
--   
--   Portable archives should contain only <a>NormalFile</a> and
--   <a>Directory</a>.
data GenEntryContent linkTarget
NormalFile :: ByteString -> {-# UNPACK #-} !FileSize -> GenEntryContent linkTarget
Directory :: GenEntryContent linkTarget
SymbolicLink :: !linkTarget -> GenEntryContent linkTarget
HardLink :: !linkTarget -> GenEntryContent linkTarget
CharacterDevice :: {-# UNPACK #-} !DevMajor -> {-# UNPACK #-} !DevMinor -> GenEntryContent linkTarget
BlockDevice :: {-# UNPACK #-} !DevMajor -> {-# UNPACK #-} !DevMinor -> GenEntryContent linkTarget
NamedPipe :: GenEntryContent linkTarget
OtherEntryType :: {-# UNPACK #-} !TypeCode -> ByteString -> {-# UNPACK #-} !FileSize -> GenEntryContent linkTarget

-- | Monomorphic content of a tar archive entry, ready for serialization /
--   deserialization.
type EntryContent = GenEntryContent LinkTarget

-- | Ownership information for <a>GenEntry</a>.
data Ownership
Ownership :: String -> String -> {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> Ownership

-- | The owner user name. Should be set to <tt>""</tt> if unknown. Must not
--   contain non-ASCII characters.
[ownerName] :: Ownership -> String

-- | The owner group name. Should be set to <tt>""</tt> if unknown. Must
--   not contain non-ASCII characters.
[groupName] :: Ownership -> String

-- | Numeric owner user id. Should be set to <tt>0</tt> if unknown.
[ownerId] :: Ownership -> {-# UNPACK #-} !Int

-- | Numeric owner group id. Should be set to <tt>0</tt> if unknown.
[groupId] :: Ownership -> {-# UNPACK #-} !Int

-- | File size in bytes.
type FileSize = Int64

-- | Permissions information for <a>GenEntry</a>.
type Permissions = FileMode

-- | The number of seconds since the UNIX epoch.
type EpochTime = Int64

-- | Major device number.
type DevMajor = Int

-- | Minor device number.
type DevMinor = Int

-- | User-defined tar format expansion.
type TypeCode = Char

-- | There have been a number of extensions to the tar file format over the
--   years. They all share the basic entry fields and put more meta-data in
--   different extended headers.
data Format

-- | This is the classic Unix V7 tar format. It does not support owner and
--   group names, just numeric Ids. It also does not support device
--   numbers.
V7Format :: Format

-- | The "USTAR" format is an extension of the classic V7 format. It was
--   later standardised by POSIX. It has some restrictions but is the most
--   portable format.
UstarFormat :: Format

-- | The GNU tar implementation also extends the classic V7 format, though
--   in a slightly different way from the USTAR format. This is the only
--   format supporting long file names.
GnuFormat :: Format

-- | An entry with all default values except for the file name and type. It
--   uses the portable USTAR/POSIX format (see <a>UstarFormat</a>).
--   
--   You can use this as a basis and override specific fields, eg:
--   
--   <pre>
--   (emptyEntry name HardLink) { linkTarget = target }
--   </pre>
simpleEntry :: tarPath -> GenEntryContent linkTarget -> GenEntry tarPath linkTarget

-- | A tar entry for a file.
--   
--   Entry fields such as file permissions and ownership have default
--   values.
--   
--   You can use this as a basis and override specific fields. For example
--   if you need an executable file you could use:
--   
--   <pre>
--   (fileEntry name content) { fileMode = executableFileMode }
--   </pre>
fileEntry :: tarPath -> ByteString -> GenEntry tarPath linkTarget

-- | A tar entry for a directory.
--   
--   Entry fields such as file permissions and ownership have default
--   values.
directoryEntry :: tarPath -> GenEntry tarPath linkTarget

-- | <a>GNU extension</a> to store a filepath too long to fit into
--   <a>entryTarPath</a> as <a>OtherEntryType</a> <tt>'L'</tt> with the
--   full filepath as <a>entryContent</a>. The next entry must contain the
--   actual data with truncated <a>entryTarPath</a>.
--   
--   See <a>What exactly is the GNU tar .<i>.</i>@LongLink "trick"?</a>
longLinkEntry :: FilePath -> GenEntry TarPath linkTarget

-- | <a>GNU extension</a> to store a link target too long to fit into
--   <a>entryTarPath</a> as <a>OtherEntryType</a> <tt>'K'</tt> with the
--   full filepath as <a>entryContent</a>. The next entry must contain the
--   actual data with truncated <a>entryTarPath</a>.
longSymLinkEntry :: FilePath -> GenEntry TarPath linkTarget

-- | <tt>rw-r--r--</tt> for normal files
ordinaryFilePermissions :: Permissions

-- | <tt>rwxr-xr-x</tt> for executable files
executableFilePermissions :: Permissions

-- | <tt>rwxr-xr-x</tt> for directories
directoryPermissions :: Permissions

-- | Construct a tar entry based on a local file.
--   
--   This sets the entry size, the data contained in the file and the
--   file's modification time. If the file is executable then that
--   information is also preserved. File ownership and detailed permissions
--   are not preserved.
--   
--   <ul>
--   <li>The file contents is read lazily.</li>
--   </ul>
packFileEntry :: FilePath -> tarPath -> IO (GenEntry tarPath linkTarget)

-- | Construct a tar entry based on a local directory (but not its
--   contents).
--   
--   The only attribute of the directory that is used is its modification
--   time. Directory ownership and detailed permissions are not preserved.
packDirectoryEntry :: FilePath -> tarPath -> IO (GenEntry tarPath linkTarget)

-- | Construct a tar entry based on a local symlink.
packSymlinkEntry :: FilePath -> tarPath -> IO (GenEntry tarPath FilePath)
getDirectoryContentsRecursive :: OsPath -> IO [(OsPath, FileType)]

-- | The classic tar format allowed just 100 characters for the file name.
--   The USTAR format extended this with an extra 155 characters, however
--   it uses a complex method of splitting the name between the two
--   sections.
--   
--   Instead of just putting any overflow into the extended area, it uses
--   the extended area as a prefix. The aggravating insane bit however is
--   that the prefix (if any) must only contain a directory prefix. That is
--   the split between the two areas must be on a directory separator
--   boundary. So there is no simple calculation to work out if a file name
--   is too long. Instead we have to try to find a valid split that makes
--   the name fit in the two areas.
--   
--   The rationale presumably was to make it a bit more compatible with old
--   tar programs that only understand the classic format. A classic tar
--   would be able to extract the file name and possibly some dir prefix,
--   but not the full dir prefix. So the files would end up in the wrong
--   place, but that's probably better than ending up with the wrong names
--   too.
--   
--   So it's understandable but rather annoying.
--   
--   <ul>
--   <li>Tar paths use Posix format (ie <tt>'/'</tt> directory separators),
--   irrespective of the local path conventions.</li>
--   <li>The directory separator between the prefix and name is <i>not</i>
--   stored.</li>
--   </ul>
data TarPath

-- | Convert a native <a>FilePath</a> to a <a>TarPath</a>.
--   
--   The conversion may fail if the <a>FilePath</a> is empty or too long.
toTarPath :: Bool -> FilePath -> Either String TarPath

-- | Convert a <a>TarPath</a> to a native <a>FilePath</a>.
--   
--   The native <a>FilePath</a> will use the native directory separator but
--   it is not otherwise checked for validity or sanity. In particular:
--   
--   <ul>
--   <li>The tar path may be invalid as a native path, eg the file name
--   <tt>"nul"</tt> is not valid on Windows.</li>
--   <li>The tar path may be an absolute path or may contain <tt>".."</tt>
--   components. For security reasons this should not usually be allowed,
--   but it is your responsibility to check for these conditions (e.g.,
--   using <a>checkEntrySecurity</a>).</li>
--   </ul>
fromTarPath :: TarPath -> FilePath

-- | Convert a <a>TarPath</a> to a Unix/Posix <a>FilePath</a>.
--   
--   The difference compared to <a>fromTarPath</a> is that it always
--   returns a Unix style path irrespective of the current operating
--   system.
--   
--   This is useful to check how a <a>TarPath</a> would be interpreted on a
--   specific operating system, eg to perform portability checks.
fromTarPathToPosixPath :: TarPath -> FilePath

-- | Convert a <a>TarPath</a> to a Windows <a>FilePath</a>.
--   
--   The only difference compared to <a>fromTarPath</a> is that it always
--   returns a Windows style path irrespective of the current operating
--   system.
--   
--   This is useful to check how a <a>TarPath</a> would be interpreted on a
--   specific operating system, eg to perform portability checks.
fromTarPathToWindowsPath :: TarPath -> FilePath

-- | The tar format allows just 100 ASCII characters for the
--   <a>SymbolicLink</a> and <a>HardLink</a> entry types.
data LinkTarget

-- | Convert a native <a>FilePath</a> to a tar <a>LinkTarget</a>. string is
--   longer than 100 characters or if it contains non-portable characters.
toLinkTarget :: FilePath -> Maybe LinkTarget

-- | Convert a tar <a>LinkTarget</a> to a native <a>FilePath</a>.
fromLinkTarget :: LinkTarget -> FilePath

-- | Convert a tar <a>LinkTarget</a> to a Unix/POSIX <a>FilePath</a>
--   (<tt>'/'</tt> path separators).
fromLinkTargetToPosixPath :: LinkTarget -> FilePath

-- | Convert a tar <a>LinkTarget</a> to a Windows <a>FilePath</a>
--   (<tt>'\\'</tt> path separators).
fromLinkTargetToWindowsPath :: LinkTarget -> FilePath


-- | Perform various checks on tar file entries.
module Codec.Archive.Tar.Check

-- | This function checks a sequence of tar entries for file name security
--   problems. It checks that:
--   
--   <ul>
--   <li>file paths are not absolute</li>
--   <li>file paths do not refer outside of the archive</li>
--   <li>file names are valid</li>
--   </ul>
--   
--   These checks are from the perspective of the current OS. That means we
--   check for "<tt>C:blah</tt>" files on Windows and "/blah" files on
--   Unix. For archive entry types <a>HardLink</a> and <a>SymbolicLink</a>
--   the same checks are done for the link target. A failure in any entry
--   terminates the sequence of entries with an error.
--   
--   Whenever possible, consider fusing <a>checkSecurity</a> with packing /
--   unpacking by using <a>packAndCheck</a> / <a>unpackAndCheck</a> with
--   <a>checkEntrySecurity</a>. Not only it is faster, but also alleviates
--   issues with lazy I/O such as exhaustion of file handlers.
checkSecurity :: Entries e -> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) FileNameError)

-- | Worker of <a>checkSecurity</a>.
checkEntrySecurity :: GenEntry FilePath FilePath -> Maybe FileNameError

-- | Errors arising from tar file names being in some way invalid or
--   dangerous
data FileNameError
InvalidFileName :: FilePath -> FileNameError
AbsoluteFileName :: FilePath -> FileNameError

UnsafeLinkTarget :: FilePath -> FileNameError

-- | This function checks a sequence of tar entries for being a "tar bomb".
--   This means that the tar file does not follow the standard convention
--   that all entries are within a single subdirectory, e.g. a file
--   "foo.tar" would usually have all entries within the "foo/"
--   subdirectory.
--   
--   Given the expected subdirectory, this function checks all entries are
--   within that subdirectroy.
--   
--   Note: This check must be used in conjunction with <a>checkSecurity</a>
--   (or <a>checkPortability</a>).
--   
--   Whenever possible, consider fusing <a>checkTarbomb</a> with packing /
--   unpacking by using <a>packAndCheck</a> / <a>unpackAndCheck</a> with
--   <a>checkEntryTarbomb</a>. Not only it is faster, but also alleviates
--   issues with lazy I/O such as exhaustion of file handlers.
checkTarbomb :: FilePath -> Entries e -> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) TarBombError)

-- | Worker of <a>checkTarbomb</a>.
checkEntryTarbomb :: FilePath -> GenEntry FilePath linkTarget -> Maybe TarBombError

-- | An error that occurs if a tar file is a "tar bomb" that would extract
--   files outside of the intended directory.
data TarBombError
TarBombError :: FilePath -> FilePath -> TarBombError

-- | This function checks a sequence of tar entries for a number of
--   portability issues. It will complain if:
--   
--   <ul>
--   <li>The old "Unix V7" or "gnu" formats are used. For maximum
--   portability only the POSIX standard "ustar" format should be
--   used.</li>
--   <li>A non-portable entry type is used. Only ordinary files, hard
--   links, symlinks and directories are portable. Device files, pipes and
--   others are not portable between all common operating systems.</li>
--   <li>Non-ASCII characters are used in file names. There is no agreed
--   portable convention for Unicode or other extended character sets in
--   file names in tar archives.</li>
--   <li>File names that would not be portable to both Unix and Windows.
--   This check includes characters that are valid in both systems and the
--   '/' vs '\' directory separator conventions.</li>
--   </ul>
--   
--   Whenever possible, consider fusing <a>checkPortability</a> with
--   packing / unpacking by using <a>packAndCheck</a> /
--   <a>unpackAndCheck</a> with <a>checkEntryPortability</a>. Not only it
--   is faster, but also alleviates issues with lazy I/O such as exhaustion
--   of file handlers.
checkPortability :: Entries e -> GenEntries FilePath FilePath (Either (Either e DecodeLongNamesError) PortabilityError)

-- | Worker of <a>checkPortability</a>.
checkEntryPortability :: GenEntry FilePath linkTarget -> Maybe PortabilityError

-- | Portability problems in a tar archive
data PortabilityError
NonPortableFormat :: Format -> PortabilityError
NonPortableFileType :: PortabilityError
NonPortableEntryNameChar :: FilePath -> PortabilityError
NonPortableFileName :: PortabilityPlatform -> FileNameError -> PortabilityError

-- | The name of a platform that portability issues arise from
type PortabilityPlatform = String


-- | Reading, writing and manipulating "<tt>.tar</tt>" archive files.
--   
--   This module uses common names and so is designed to be imported
--   qualified:
--   
--   <pre>
--   import qualified Codec.Archive.Tar as Tar
--   </pre>
module Codec.Archive.Tar

-- | Create a new <tt>".tar"</tt> file from a directory of files.
--   
--   It is equivalent to calling the standard <tt>tar</tt> program like so:
--   
--   <pre>
--   $ tar -f tarball.tar -C base -c dir
--   </pre>
--   
--   This assumes a directory <tt>./base/dir</tt> with files inside, eg
--   <tt>./base/dir/foo.txt</tt>. The file names inside the resulting tar
--   file will be relative to <tt>dir</tt>, eg <tt>dir/foo.txt</tt>.
--   
--   This is a high level "all in one" operation. Since you may need
--   variations on this function it is instructive to see how it is
--   written. It is just:
--   
--   <pre>
--   import qualified Data.ByteString.Lazy as BL
--   
--   BL.writeFile tar . Tar.write =&lt;&lt; Tar.pack base paths
--   </pre>
--   
--   Notes:
--   
--   The files and directories must not change during this operation or the
--   result is not well defined.
--   
--   The intention of this function is to create tarballs that are portable
--   between systems. It is <i>not</i> suitable for doing file system
--   backups because file ownership and permissions are not fully
--   preserved. File ownership is not preserved at all. File permissions
--   are set to simple portable values:
--   
--   <ul>
--   <li><tt>rw-r--r--</tt> for normal files</li>
--   <li><tt>rwxr-xr-x</tt> for executable files</li>
--   <li><tt>rwxr-xr-x</tt> for directories</li>
--   </ul>
create :: FilePath -> FilePath -> [FilePath] -> IO ()

-- | Extract all the files contained in a <tt>".tar"</tt> file.
--   
--   It is equivalent to calling the standard <tt>tar</tt> program like so:
--   
--   <pre>
--   $ tar -x -f tarball.tar -C dir
--   </pre>
--   
--   So for example if the <tt>tarball.tar</tt> file contains
--   <tt>foo/bar.txt</tt> then this will extract it to
--   <tt>dir/foo/bar.txt</tt>.
--   
--   This is a high level "all in one" operation. Since you may need
--   variations on this function it is instructive to see how it is
--   written. It is just:
--   
--   <pre>
--   import qualified Data.ByteString.Lazy as BL
--   
--   Tar.unpack dir . Tar.read =&lt;&lt; BL.readFile tar
--   </pre>
--   
--   Notes:
--   
--   Extracting can fail for a number of reasons. The tarball may be
--   incorrectly formatted. There may be IO or permission errors. In such
--   cases an exception will be thrown and extraction will not continue.
--   
--   Since the extraction may fail part way through it is not atomic. For
--   this reason you may want to extract into an empty directory and, if
--   the extraction fails, recursively delete the directory.
--   
--   Security: only files inside the target directory will be written.
--   Tarballs containing entries that point outside of the tarball (either
--   absolute paths or relative paths) will be caught and an exception will
--   be thrown.
extract :: FilePath -> FilePath -> IO ()

-- | Append new entries to a <tt>".tar"</tt> file from a directory of
--   files.
--   
--   This is much like <a>create</a>, except that all the entries are added
--   to the end of an existing tar file. Or if the file does not already
--   exists then it behaves the same as <a>create</a>.
append :: FilePath -> FilePath -> [FilePath] -> IO ()

-- | Convert a data stream in the tar file format into an internal data
--   structure. Decoding errors are reported by the <a>Fail</a> constructor
--   of the <a>Entries</a> type.
--   
--   <ul>
--   <li>The conversion is done lazily.</li>
--   </ul>
read :: ByteString -> Entries FormatError

-- | Create the external representation of a tar archive by serialising a
--   list of tar entries.
--   
--   <ul>
--   <li>The conversion is done lazily.</li>
--   </ul>
write :: [Entry] -> ByteString

-- | Creates a tar archive from a list of directory or files. Any
--   directories specified will have their contents included recursively.
--   Paths in the archive will be relative to the given base directory.
--   
--   This is a portable implementation of packing suitable for portable
--   archives. In particular it only constructs <a>NormalFile</a>,
--   <a>Directory</a> and <a>SymbolicLink</a> entries. Hard links are
--   treated like ordinary files. Special files like FIFOs (named pipes),
--   sockets or device files will cause problems.
--   
--   <ul>
--   <li>This function returns results lazily. Subdirectories are scanned
--   and files are read one by one as the list of entries is consumed. Do
--   not change their contents before the output of <a>pack</a> was
--   consumed in full.</li>
--   </ul>
pack :: FilePath -> [FilePath] -> IO [Entry]

-- | Like <a>pack</a>, but allows to specify additional sanity/security
--   checks on the input filenames. This is useful if you know which check
--   will be used on client side in <a>unpack</a> / <a>unpackAndCheck</a>.
packAndCheck :: (GenEntry FilePath FilePath -> Maybe SomeException) -> FilePath -> [FilePath] -> IO [Entry]

-- | Create local files and directories based on the entries of a tar
--   archive.
--   
--   This is a portable implementation of unpacking suitable for portable
--   archives. It handles <a>NormalFile</a> and <a>Directory</a> entries
--   and has simulated support for <a>SymbolicLink</a> and <a>HardLink</a>
--   entries. Links are implemented by copying the target file. This
--   therefore works on Windows as well as Unix. All other entry types are
--   ignored, that is they are not unpacked and no exception is raised.
--   
--   If the <a>Entries</a> ends in an error then it is raised an an
--   exception. Any files or directories that have been unpacked before the
--   error was encountered will not be deleted. For this reason you may
--   want to unpack into an empty directory so that you can easily clean up
--   if unpacking fails part-way.
--   
--   On its own, this function only checks for security (using
--   <a>checkEntrySecurity</a>). Use <a>unpackAndCheck</a> if you need more
--   checks.
unpack :: Exception e => FilePath -> Entries e -> IO ()

-- | Like <a>unpack</a>, but run custom sanity/security checks instead of
--   <a>checkEntrySecurity</a>. For example,
--   
--   <pre>
--   import Control.Exception (SomeException(..))
--   import Control.Applicative ((&lt;|&gt;))
--   
--   unpackAndCheck (\x -&gt; SomeException &lt;$&gt; checkEntryPortability x
--                     &lt;|&gt; SomeException &lt;$&gt; checkEntrySecurity x) dir entries
--   </pre>
unpackAndCheck :: Exception e => (GenEntry FilePath FilePath -> Maybe SomeException) -> FilePath -> Entries e -> IO ()

-- | Polymorphic tar archive entry. High-level interfaces commonly work
--   with <a>GenEntry</a> <a>FilePath</a> <a>FilePath</a>, while low-level
--   ones use <a>GenEntry</a> <a>TarPath</a> <a>LinkTarget</a>.
data GenEntry tarPath linkTarget

-- | Monomorphic tar archive entry, ready for serialization /
--   deserialization.
type Entry = GenEntry TarPath LinkTarget

-- | Low-level function to get a native <a>FilePath</a> of the file or
--   directory within the archive, not accounting for long names. It's
--   likely that you want to apply <a>decodeLongNames</a> and use
--   <a>entryTarPath</a> afterwards instead of <a>entryPath</a>.
entryPath :: GenEntry TarPath linkTarget -> FilePath

-- | The real content of the entry. For <a>NormalFile</a> this includes the
--   file data. An entry usually contains a <a>NormalFile</a> or a
--   <a>Directory</a>.
entryContent :: GenEntry tarPath linkTarget -> GenEntryContent linkTarget

-- | Polymorphic content of a tar archive entry. High-level interfaces
--   commonly work with <a>GenEntryContent</a> <a>FilePath</a>, while
--   low-level ones use <a>GenEntryContent</a> <a>LinkTarget</a>.
--   
--   Portable archives should contain only <a>NormalFile</a> and
--   <a>Directory</a>.
data GenEntryContent linkTarget
NormalFile :: ByteString -> {-# UNPACK #-} !FileSize -> GenEntryContent linkTarget
Directory :: GenEntryContent linkTarget
SymbolicLink :: !linkTarget -> GenEntryContent linkTarget
HardLink :: !linkTarget -> GenEntryContent linkTarget
CharacterDevice :: {-# UNPACK #-} !DevMajor -> {-# UNPACK #-} !DevMinor -> GenEntryContent linkTarget
BlockDevice :: {-# UNPACK #-} !DevMajor -> {-# UNPACK #-} !DevMinor -> GenEntryContent linkTarget
NamedPipe :: GenEntryContent linkTarget
OtherEntryType :: {-# UNPACK #-} !TypeCode -> ByteString -> {-# UNPACK #-} !FileSize -> GenEntryContent linkTarget

-- | Monomorphic content of a tar archive entry, ready for serialization /
--   deserialization.
type EntryContent = GenEntryContent LinkTarget

-- | Polymorphic sequence of archive entries. High-level interfaces
--   commonly work with <a>GenEntries</a> <a>FilePath</a> <a>FilePath</a>,
--   while low-level ones use <a>GenEntries</a> <a>TarPath</a>
--   <a>LinkTarget</a>.
--   
--   The point of this type as opposed to just using a list is that it
--   makes the failure case explicit. We need this because the sequence of
--   entries we get from reading a tarball can include errors.
--   
--   Converting from a list can be done with just <tt>foldr Next Done</tt>.
--   Converting back into a list can be done with <a>foldEntries</a>
--   however in that case you must be prepared to handle the <a>Fail</a>
--   case inherent in the <a>Entries</a> type.
--   
--   The <a>Monoid</a> instance lets you concatenate archives or append
--   entries to an archive.
data GenEntries tarPath linkTarget e
Next :: GenEntry tarPath linkTarget -> GenEntries tarPath linkTarget e -> GenEntries tarPath linkTarget e
Done :: GenEntries tarPath linkTarget e
Fail :: e -> GenEntries tarPath linkTarget e
infixr 5 `Next`

-- | Monomorphic sequence of archive entries, ready for serialization /
--   deserialization.
type Entries e = GenEntries TarPath LinkTarget e

-- | This is like the standard <a>map</a> function on lists, but for
--   <a>Entries</a>. It includes failure as a extra possible outcome of the
--   mapping function.
--   
--   If your mapping function cannot fail it may be more convenient to use
--   <a>mapEntriesNoFail</a>
mapEntries :: (GenEntry tarPath linkTarget -> Either e' (GenEntry tarPath linkTarget)) -> GenEntries tarPath linkTarget e -> GenEntries tarPath linkTarget (Either e e')

-- | Like <a>mapEntries</a> but the mapping function itself cannot fail.
mapEntriesNoFail :: (GenEntry tarPath linkTarget -> GenEntry tarPath linkTarget) -> GenEntries tarPath linkTarget e -> GenEntries tarPath linkTarget e

-- | This is like the standard <a>foldr</a> function on lists, but for
--   <a>Entries</a>. Compared to <a>foldr</a> it takes an extra function to
--   account for the possibility of failure.
--   
--   This is used to consume a sequence of entries. For example it could be
--   used to scan a tarball for problems or to collect an index of the
--   contents.
foldEntries :: (GenEntry tarPath linkTarget -> a -> a) -> a -> (e -> a) -> GenEntries tarPath linkTarget e -> a

-- | A <a>foldl</a>-like function on Entries. It either returns the final
--   accumulator result, or the failure along with the intermediate
--   accumulator value.
foldlEntries :: (a -> GenEntry tarPath linkTarget -> a) -> a -> GenEntries tarPath linkTarget e -> Either (e, a) a

-- | This is like the standard <a>unfoldr</a> function on lists, but for
--   <a>Entries</a>. It includes failure as an extra possibility that the
--   stepper function may return.
--   
--   It can be used to generate <a>Entries</a> from some other type. For
--   example it is used internally to lazily unfold entries from a
--   <a>ByteString</a>.
unfoldEntries :: (a -> Either e (Maybe (GenEntry tarPath linkTarget, a))) -> a -> GenEntries tarPath linkTarget e

-- | Translate high-level entries with POSIX <a>FilePath</a>s for files and
--   symlinks into entries suitable for serialization by emitting
--   additional <a>OtherEntryType</a> <tt>'K'</tt> and
--   <a>OtherEntryType</a> <tt>'L'</tt> nodes.
--   
--   Input <a>FilePath</a>s must be POSIX file names, not native ones.
encodeLongNames :: GenEntry FilePath FilePath -> [Entry]

-- | Translate low-level entries (usually freshly deserialized) into
--   high-level entries with POSIX <a>FilePath</a>s for files and symlinks
--   by parsing and eliminating <a>OtherEntryType</a> <tt>'K'</tt> and
--   <a>OtherEntryType</a> <tt>'L'</tt> nodes.
--   
--   Resolved <a>FilePath</a>s are still POSIX file names, not native ones.
decodeLongNames :: Entries e -> GenEntries FilePath FilePath (Either e DecodeLongNamesError)

-- | Errors raised by <a>decodeLongNames</a>.
data DecodeLongNamesError

-- | Two adjacent <a>OtherEntryType</a> <tt>'K'</tt> nodes.
TwoTypeKEntries :: DecodeLongNamesError

-- | Two adjacent <a>OtherEntryType</a> <tt>'L'</tt> nodes.
TwoTypeLEntries :: DecodeLongNamesError

-- | <a>OtherEntryType</a> <tt>'K'</tt> node is not followed by a
--   <a>SymbolicLink</a> / <a>HardLink</a>.
NoLinkEntryAfterTypeKEntry :: DecodeLongNamesError

-- | Errors that can be encountered when parsing a Tar archive.
data FormatError
TruncatedArchive :: FormatError
ShortTrailer :: FormatError
BadTrailer :: FormatError
TrailingJunk :: FormatError
ChecksumIncorrect :: FormatError
NotTarFormat :: FormatError
UnrecognisedTarFormat :: FormatError
HeaderBadNumericEncoding :: FormatError
