#!/bin/sh
# minc-core : Mini-container core script in new world
#
# Copyright (C) 2017 Masami Hiramatsu <masami.hiramatsu@gmail.com>
# This program is released under the MIT License, see LICENSE.

LIBEXEC=/usr/libexec/mincs
MINCCOAT=$LIBEXEC/minc-coat
MINCLEASH=$LIBEXEC/minc-leash
MINCMOULT=$LIBEXEC/minc-moult

# Exit if any errors
set -e
test "$MINC_DEBUG" && set -x
:
: 'Get the PID in parent namespace from procfs'
: '(At this point, we still have the procfs in original namespace)'
:
export MINC_PID=`cut -f 4 -d" " /proc/self/stat`
echo $MINC_PID > $MINC_TMPDIR/pid

if [ "$MINC_UTSNAME" ]; then
  :;: 'Setup utsname for this container';:
  hostname $MINC_UTSNAME
  echo $MINC_UTSNAME > $MINC_TMPDIR/utsname
elif [ -f $MINC_TMPDIR/utsname ]; then
  :;: 'Setup utsname from existing container image';:
  hostname `cat $MINC_TMPDIR/utsname`
fi

:;: 'Make current mount namespace private';:
mount --make-rprivate /

:;: 'Do not update /etc/mtab since the mount is private';:
export LIBMOUNT_MTAB=/proc/mounts

:;: 'Setup overlay rootfs by minc-coat';:
$MINCCOAT bind $MINC_TMPDIR $MINC_BASEDIR

:;: 'Prepare root directory';:
RD=$MINC_TMPDIR/root
mkdir -p $RD/etc $RD/dev $RD/sys $RD/proc

bindmount() { # dir
  if test -e $1; then (test -e $RD$1 || touch $RD$1) && mount --bind $1 $RD$1; fi
}

bindmounts() { # dir(s)
  for d in $*; do bindmount $d; done
}

bindmount2() { # hostpath containerpath
  test -d $1 && mkdir -p $RD$2
  if [ -L $RD$2 ]; then
    if [ "$MINC_DIRECT" ]; then
      echo "Error: $2 is a symbolic link in container rootfs."\
	   " We can not bind mount on it."
      return -1
    else
      rm $RD$2
      touch $RD$2
    fi
  fi
  if [ -f $1 -a ! -f $RD$2 ]; then mkdir -p `dirname $RD$2`; touch $RD$2; fi
  test -e $1 && mount --bind $1 $RD$2
}

user_bindmount() {
if [ "$MINC_RWBIND" ]; then
  :;: 'Bind user-defined directories';:
  for i in $MINC_RWBIND; do
    bindmount2 `echo $i | sed "s/:/ /"`
  done
fi
}

:;: "Setup name resolvers" ;:
bindmount2 $MINC_TMPDIR/resolv.conf /etc/resolv.conf
bindmount2 $MINC_TMPDIR/hosts /etc/hosts

if [ -x "$MINC_CROSS_QEMU" ]; then
  :;: 'Bind qemu-user-mode for cross-arch container';:
  mkdir -p $RD/usr/bin/
  [ -x $RD/$MINC_CROSS_QEMU ] || touch $RD/$MINC_CROSS_QEMU;
  bindmount $MINC_CROSS_QEMU
fi

if [ "$MINC_QEMU" ]; then
  :
  : 'Enter qemu-system or user-mode-linux container (ermine)'
  :
  . $MINCMOULT
  MINC_GUEST_OPT="-r /mnt/root --name `hostname`"
  [ "$MINC_USE_DEV" ] && MINC_GUEST_OPT="$MINC_GUEST_OPT --usedev"

  :;: 'Since host mounts overlayfs on rootfs, guest skips it.';:
  MINC_GUEST_OPT="$MINC_GUEST_OPT -D"
  [ "$MINC_DEBUG" ] && MINC_GUEST_OPT="$MINC_GUEST_OPT --debug"
  [ $# -ne 0 ] && MINC_GUEST_OPT="$MINC_GUEST_OPT \"$@\""

  :;: 'Prepare run.sh for qemu/um internal container';:
  echo "#!/bin/sh" > $MINC_TMPDIR/run.sh
  echo "stty rows `tput lines`; stty cols `tput cols`" >> $MINC_TMPDIR/run.sh
  echo "minc $MINC_GUEST_OPT" >> $MINC_TMPDIR/run.sh

  user_bindmount

  minc_moult "$MINC_ARCH" "$MINC_TMPDIR" "ro quiet $MINC_KERNEL_OPT"
  exit $? # failsafe
fi

if [ ! -L $RD/etc/mtab -a -e $RD/etc/mtab ]; then
  :;: 'Hide /etc/mtab from this namespace';:
  rm -f $RD/etc/mtab
  ln -s /proc/mounts $RD/etc/mtab
fi

:;: 'Prepare special device files';:
if [ "$MINC_USE_DEV" ]; then
  :;: 'Directly use devtmpfs for new /dev';:
  mount -t devtmpfs devtmpfs $RD/dev
else
  :;: 'Make a fake /dev directory';:
  mount -t tmpfs tmpfs $RD/dev
  mkdir $RD/dev/pts

  :;: "Get tty or output file" ;:
  TTY=`tty` || TTY=`readlink /proc/$MINC_PID/fd/1`
  if [ "$MINC_OPT_PTY" ]; then
    :;: 'This is just a quick hack to show tty in non priv container';:
    touch $RD$TTY; bindmount $TTY
    touch $RD/dev/pts/ptmx; bindmount /dev/pts/ptmx
  else
    :;: 'Hack the tty bind to /dev/console and set it';:
    touch $RD/dev/console
    bindmount2 $TTY /dev/console
    stty -F /dev/console > /dev/null
    :;: 'And make a new instance of devpts';:
    mount devpts -t devpts -onoexec,nosuid,gid=5,mode=0620,newinstance,ptmxmode=0666 $RD/dev/pts
  fi
  ln -s /dev/pts/ptmx $RD/dev/ptmx

  :;: 'Bind fundamental device files to new /dev';:
  bindmounts /dev/null /dev/zero /dev/random /dev/urandom
  test -d /dev/mqueue && mkdir $RD/dev/mqueue && bindmount /dev/mqueue
  # TODO: Add other devices here
fi

:;: 'Do not bind procfs, since it shows outside pids';:
mount -t proc -o ro,nosuid,nodev,noexec proc /proc
mount -t proc -o rw,nosuid,nodev,noexec,relatime proc $RD/proc
bindmounts /proc/sys /proc/sysrq-trigger /proc/irq /proc/bus
[ -z "$MINC_NOPRIV" ] && bindmount /sys

:;: '/tmp is used for application working area in container';:
mount -t tmpfs tmpfs $RD/tmp

user_bindmount

:;: 'Exec leash (chroot/capsh) to run command in new rootfs';:
exec $MINCLEASH $RD "$@"
