{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}

{-# OPTIONS_HADDOCK hide #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Avoid restricted function" #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Codec.Archive.Tar
-- Copyright   :  (c) 2007 Bjorn Bringert,
--                    2008 Andrea Vezzosi,
--                    2008-2009, 2012, 2016 Duncan Coutts
-- License     :  BSD3
--
-- Maintainer  :  duncan@community.haskell.org
-- Portability :  portable
--
-----------------------------------------------------------------------------
module Codec.Archive.Tar.Pack (
    pack,
    packAndCheck,
    packFileEntry,
    packDirectoryEntry,
    packSymlinkEntry,
    longLinkEntry,
  ) where

import Codec.Archive.Tar.LongNames
import Codec.Archive.Tar.PackAscii (filePathToOsPath, osPathToFilePath)
import Codec.Archive.Tar.Types

import Data.Bifunctor (bimap)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import Data.Foldable
import System.File.OsPath
import System.OsPath
         ( OsPath, (</>) )
import qualified System.OsPath as FilePath.Native
         ( addTrailingPathSeparator, hasTrailingPathSeparator )
import System.Directory.OsPath
         ( doesDirectoryExist, getModificationTime
         , pathIsSymbolicLink, getSymbolicLinkTarget
         , Permissions(..), getPermissions, getFileSize )
import qualified System.Directory.OsPath.Types as FT
import System.Directory.OsPath.Streaming (getDirectoryContentsRecursive)
import Data.Time.Clock.POSIX
         ( utcTimeToPOSIXSeconds )
import System.IO
         ( IOMode(ReadMode), hFileSize )
import System.IO.Unsafe (unsafeInterleaveIO)
import Control.Exception (throwIO, SomeException)

-- | Creates a tar archive from a list of directory or files. Any directories
-- specified will have their contents included recursively. Paths in the
-- archive will be relative to the given base directory.
--
-- This is a portable implementation of packing suitable for portable archives.
-- In particular it only constructs 'NormalFile', 'Directory' and 'SymbolicLink'
-- entries. Hard links are treated like ordinary files. Special files like
-- FIFOs (named pipes), sockets or device files will cause problems.
--
-- * This function returns results lazily. Subdirectories are scanned
-- and files are read one by one as the list of entries is consumed.
-- Do not change their contents before the output of 'Codec.Archive.Tar.pack' was consumed in full.
--
pack
  :: FilePath   -- ^ Base directory
  -> [FilePath] -- ^ Files and directories to pack, relative to the base dir
  -> IO [Entry]
pack :: FilePath -> [FilePath] -> IO [Entry]
pack = (GenEntry FilePath FilePath -> Maybe SomeException)
-> FilePath -> [FilePath] -> IO [Entry]
packAndCheck (Maybe SomeException
-> GenEntry FilePath FilePath -> Maybe SomeException
forall a b. a -> b -> a
const Maybe SomeException
forall a. Maybe a
Nothing)

-- | Like 'Codec.Archive.Tar.pack', but allows to specify additional sanity/security
-- checks on the input filenames. This is useful if you know which
-- check will be used on client side
-- in 'Codec.Archive.Tar.unpack' / 'Codec.Archive.Tar.unpackAndCheck'.
--
-- @since 0.6.0.0
packAndCheck
  :: (GenEntry FilePath FilePath -> Maybe SomeException)
  -> FilePath   -- ^ Base directory
  -> [FilePath] -- ^ Files and directories to pack, relative to the base dir
  -> IO [Entry]
packAndCheck :: (GenEntry FilePath FilePath -> Maybe SomeException)
-> FilePath -> [FilePath] -> IO [Entry]
packAndCheck GenEntry FilePath FilePath -> Maybe SomeException
secCB (FilePath -> OsString
filePathToOsPath -> OsString
baseDir) ((FilePath -> OsString) -> [FilePath] -> [OsString]
forall a b. (a -> b) -> [a] -> [b]
map FilePath -> OsString
filePathToOsPath -> [OsString]
relpaths) = do
  paths <- OsString -> [OsString] -> IO [OsString]
preparePaths OsString
baseDir [OsString]
relpaths
  entries' <- packPaths baseDir paths
  let entries = (GenEntry OsString OsString -> GenEntry FilePath FilePath)
-> [GenEntry OsString OsString] -> [GenEntry FilePath FilePath]
forall a b. (a -> b) -> [a] -> [b]
map ((OsString -> FilePath)
-> (OsString -> FilePath)
-> GenEntry OsString OsString
-> GenEntry FilePath FilePath
forall a b c d.
(a -> b) -> (c -> d) -> GenEntry a c -> GenEntry b d
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap OsString -> FilePath
osPathToFilePath OsString -> FilePath
osPathToFilePath) [GenEntry OsString OsString]
entries'
  traverse_ (maybe (pure ()) throwIO . secCB) entries
  pure $ concatMap encodeLongNames entries

preparePaths :: OsPath -> [OsPath] -> IO [OsPath]
preparePaths :: OsString -> [OsString] -> IO [OsString]
preparePaths OsString
baseDir = ([[OsString]] -> [OsString]) -> IO [[OsString]] -> IO [OsString]
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[OsString]] -> [OsString]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (IO [[OsString]] -> IO [OsString])
-> ([OsString] -> IO [[OsString]]) -> [OsString] -> IO [OsString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [IO [OsString]] -> IO [[OsString]]
forall a. [IO a] -> IO [a]
interleave ([IO [OsString]] -> IO [[OsString]])
-> ([OsString] -> [IO [OsString]]) -> [OsString] -> IO [[OsString]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (OsString -> IO [OsString]) -> [OsString] -> [IO [OsString]]
forall a b. (a -> b) -> [a] -> [b]
map OsString -> IO [OsString]
go
  where
    go :: OsPath -> IO [OsPath]
    go :: OsString -> IO [OsString]
go OsString
relpath = do
      let abspath :: OsString
abspath = OsString
baseDir OsString -> OsString -> OsString
</> OsString
relpath
      isDir  <- OsString -> IO Bool
doesDirectoryExist OsString
abspath
      isSymlink <- pathIsSymbolicLink abspath
      if isDir && not isSymlink then do
        entries <- getDirectoryContentsRecursive abspath
        let entries' = ((OsString, FileType) -> OsString)
-> [(OsString, FileType)] -> [OsString]
forall a b. (a -> b) -> [a] -> [b]
map ((OsString
relpath OsString -> OsString -> OsString
</>) (OsString -> OsString)
-> ((OsString, FileType) -> OsString)
-> (OsString, FileType)
-> OsString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (OsString, FileType) -> OsString
addSeparatorIfDir) [(OsString, FileType)]
entries
        return $ if relpath == mempty
          then entries'
          else FilePath.Native.addTrailingPathSeparator relpath : entries'
      else return [relpath]

    addSeparatorIfDir :: (OsString, FileType) -> OsString
addSeparatorIfDir (OsString
fn, FileType
ty) = case FileType
ty of
      FT.Directory{} -> OsString -> OsString
FilePath.Native.addTrailingPathSeparator OsString
fn
      FileType
_ -> OsString
fn

-- | Pack paths while accounting for overlong filepaths.
packPaths
  :: OsPath
  -> [OsPath]
  -> IO [GenEntry OsPath OsPath]
packPaths :: OsString -> [OsString] -> IO [GenEntry OsString OsString]
packPaths OsString
baseDir [OsString]
paths = [IO (GenEntry OsString OsString)]
-> IO [GenEntry OsString OsString]
forall a. [IO a] -> IO [a]
interleave ([IO (GenEntry OsString OsString)]
 -> IO [GenEntry OsString OsString])
-> [IO (GenEntry OsString OsString)]
-> IO [GenEntry OsString OsString]
forall a b. (a -> b) -> a -> b
$ ((OsString -> IO (GenEntry OsString OsString))
 -> [OsString] -> [IO (GenEntry OsString OsString)])
-> [OsString]
-> (OsString -> IO (GenEntry OsString OsString))
-> [IO (GenEntry OsString OsString)]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (OsString -> IO (GenEntry OsString OsString))
-> [OsString] -> [IO (GenEntry OsString OsString)]
forall a b. (a -> b) -> [a] -> [b]
map [OsString]
paths ((OsString -> IO (GenEntry OsString OsString))
 -> [IO (GenEntry OsString OsString)])
-> (OsString -> IO (GenEntry OsString OsString))
-> [IO (GenEntry OsString OsString)]
forall a b. (a -> b) -> a -> b
$ \OsString
relpath -> do
  let isDir :: Bool
isDir = OsString -> Bool
FilePath.Native.hasTrailingPathSeparator OsString
abspath
      abspath :: OsString
abspath = OsString
baseDir OsString -> OsString -> OsString
</> OsString
relpath
  isSymlink <- OsString -> IO Bool
pathIsSymbolicLink OsString
abspath
  let mkEntry
        | Bool
isSymlink = OsString -> tarPath -> IO (GenEntry tarPath OsString)
forall tarPath.
OsString -> tarPath -> IO (GenEntry tarPath OsString)
packSymlinkEntry'
        | Bool
isDir = OsString -> tarPath -> IO (GenEntry tarPath OsString)
forall tarPath linkTarget.
OsString -> tarPath -> IO (GenEntry tarPath linkTarget)
packDirectoryEntry'
        | Bool
otherwise = OsString -> tarPath -> IO (GenEntry tarPath OsString)
forall tarPath linkTarget.
OsString -> tarPath -> IO (GenEntry tarPath linkTarget)
packFileEntry'
  mkEntry abspath relpath

interleave :: [IO a] -> IO [a]
interleave :: forall a. [IO a] -> IO [a]
interleave = IO [a] -> IO [a]
forall a. IO a -> IO a
unsafeInterleaveIO (IO [a] -> IO [a]) -> ([IO a] -> IO [a]) -> [IO a] -> IO [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [IO a] -> IO [a]
forall a. [IO a] -> IO [a]
go
  where
    go :: [IO a] -> IO [a]
go []     = [a] -> IO [a]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return []
    go (IO a
x:[IO a]
xs) = do
      x'  <- IO a
x
      xs' <- interleave xs
      return (x':xs')

-- | Construct a tar entry based on a local file.
--
-- This sets the entry size, the data contained in the file and the file's
-- modification time. If the file is executable then that information is also
-- preserved. File ownership and detailed permissions are not preserved.
--
-- * The file contents is read lazily.
--
packFileEntry
  :: FilePath -- ^ Full path to find the file on the local disk
  -> tarPath  -- ^ Path to use for the tar 'GenEntry' in the archive
  -> IO (GenEntry tarPath linkTarget)
packFileEntry :: forall tarPath linkTarget.
FilePath -> tarPath -> IO (GenEntry tarPath linkTarget)
packFileEntry = OsString -> tarPath -> IO (GenEntry tarPath linkTarget)
forall tarPath linkTarget.
OsString -> tarPath -> IO (GenEntry tarPath linkTarget)
packFileEntry' (OsString -> tarPath -> IO (GenEntry tarPath linkTarget))
-> (FilePath -> OsString)
-> FilePath
-> tarPath
-> IO (GenEntry tarPath linkTarget)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FilePath -> OsString
filePathToOsPath

packFileEntry'
  :: OsPath  -- ^ Full path to find the file on the local disk
  -> tarPath -- ^ Path to use for the tar 'GenEntry' in the archive
  -> IO (GenEntry tarPath linkTarget)
packFileEntry' :: forall tarPath linkTarget.
OsString -> tarPath -> IO (GenEntry tarPath linkTarget)
packFileEntry' OsString
filepath tarPath
tarpath = do
  mtime   <- OsString -> IO Int64
getModTime OsString
filepath
  perms   <- getPermissions filepath
  -- Get file size without opening it.
  approxSize <- getFileSize filepath

  (content, size) <- if approxSize < 131072
    -- If file is short enough, just read it strictly
    -- so that no file handle dangles around indefinitely.
    then do
      cnt <- readFile' filepath
      pure (BL.fromStrict cnt, fromIntegral $ B.length cnt)
    else do
      hndl <- openBinaryFile filepath ReadMode
      -- File size could have changed between measuring approxSize
      -- and here. Measuring again.
      sz <- hFileSize hndl
      -- Lazy I/O at its best: once cnt is forced in full,
      -- BL.hGetContents will close the handle.
      cnt <- BL.hGetContents hndl
      -- It would be wrong to return (cnt, BL.length sz):
      -- NormalFile constructor below forces size which in turn
      -- allocates entire cnt in memory at once.
      pure (cnt, fromInteger sz)

  pure (simpleEntry tarpath (NormalFile content size))
    { entryPermissions =
      if executable perms then executableFilePermissions else ordinaryFilePermissions
    , entryTime = mtime
    }

-- | Construct a tar entry based on a local directory (but not its contents).
--
-- The only attribute of the directory that is used is its modification time.
-- Directory ownership and detailed permissions are not preserved.
--
packDirectoryEntry
  :: FilePath -- ^ Full path to find the file on the local disk
  -> tarPath  -- ^ Path to use for the tar 'GenEntry' in the archive
  -> IO (GenEntry tarPath linkTarget)
packDirectoryEntry :: forall tarPath linkTarget.
FilePath -> tarPath -> IO (GenEntry tarPath linkTarget)
packDirectoryEntry = OsString -> tarPath -> IO (GenEntry tarPath linkTarget)
forall tarPath linkTarget.
OsString -> tarPath -> IO (GenEntry tarPath linkTarget)
packDirectoryEntry' (OsString -> tarPath -> IO (GenEntry tarPath linkTarget))
-> (FilePath -> OsString)
-> FilePath
-> tarPath
-> IO (GenEntry tarPath linkTarget)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FilePath -> OsString
filePathToOsPath

packDirectoryEntry'
  :: OsPath  -- ^ Full path to find the file on the local disk
  -> tarPath -- ^ Path to use for the tar 'GenEntry' in the archive
  -> IO (GenEntry tarPath linkTarget)
packDirectoryEntry' :: forall tarPath linkTarget.
OsString -> tarPath -> IO (GenEntry tarPath linkTarget)
packDirectoryEntry' OsString
filepath tarPath
tarpath = do
  mtime   <- OsString -> IO Int64
getModTime OsString
filepath
  return (directoryEntry tarpath) {
    entryTime = mtime
  }

-- | Construct a tar entry based on a local symlink.
--
-- @since 0.6.0.0
packSymlinkEntry
  :: FilePath -- ^ Full path to find the file on the local disk
  -> tarPath  -- ^ Path to use for the tar 'GenEntry' in the archive
  -> IO (GenEntry tarPath FilePath)
packSymlinkEntry :: forall tarPath.
FilePath -> tarPath -> IO (GenEntry tarPath FilePath)
packSymlinkEntry = (((GenEntry tarPath OsString -> GenEntry tarPath FilePath)
-> IO (GenEntry tarPath OsString) -> IO (GenEntry tarPath FilePath)
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((OsString -> FilePath)
-> GenEntry tarPath OsString -> GenEntry tarPath FilePath
forall a b. (a -> b) -> GenEntry tarPath a -> GenEntry tarPath b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap OsString -> FilePath
osPathToFilePath) (IO (GenEntry tarPath OsString) -> IO (GenEntry tarPath FilePath))
-> (tarPath -> IO (GenEntry tarPath OsString))
-> tarPath
-> IO (GenEntry tarPath FilePath)
forall b c a. (b -> c) -> (a -> b) -> a -> c
.) ((tarPath -> IO (GenEntry tarPath OsString))
 -> tarPath -> IO (GenEntry tarPath FilePath))
-> (OsString -> tarPath -> IO (GenEntry tarPath OsString))
-> OsString
-> tarPath
-> IO (GenEntry tarPath FilePath)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. OsString -> tarPath -> IO (GenEntry tarPath OsString)
forall tarPath.
OsString -> tarPath -> IO (GenEntry tarPath OsString)
packSymlinkEntry') (OsString -> tarPath -> IO (GenEntry tarPath FilePath))
-> (FilePath -> OsString)
-> FilePath
-> tarPath
-> IO (GenEntry tarPath FilePath)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FilePath -> OsString
filePathToOsPath

packSymlinkEntry'
  :: OsPath  -- ^ Full path to find the file on the local disk
  -> tarPath -- ^ Path to use for the tar 'GenEntry' in the archive
  -> IO (GenEntry tarPath OsPath)
packSymlinkEntry' :: forall tarPath.
OsString -> tarPath -> IO (GenEntry tarPath OsString)
packSymlinkEntry' OsString
filepath tarPath
tarpath = do
  linkTarget <- OsString -> IO OsString
getSymbolicLinkTarget OsString
filepath
  pure $ symlinkEntry tarpath linkTarget

getModTime :: OsPath -> IO EpochTime
getModTime :: OsString -> IO Int64
getModTime OsString
path = do
  -- The directory package switched to the new time package
  t <- OsString -> IO UTCTime
getModificationTime OsString
path
  return . floor . utcTimeToPOSIXSeconds $ t