#!/usr/bin/python3

import abc
import argparse
import ast
import atexit
import glob
import inspect
import os
import os.path
import re
import shutil
import sys

sys.path.insert(0, ("/usr/lib64/charliecloud"))
import charliecloud as ch


## Globals ##

# Command line arguments.
cli = None

# Environment object.
env = None

# Image that we are building.
image = None


## Imports not in standard library ##

# See charliecloud.py for the messy import of this.
lark = ch.lark


## Constants ##

CH_BIN = os.path.dirname(os.path.abspath(
           inspect.getframeinfo(inspect.currentframe()).filename))
CH_RUN = CH_BIN + "/ch-run"

ARG_DEFAULTS = { "HTTP_PROXY": os.environ.get("HTTP_PROXY"),
                 "HTTPS_PROXY": os.environ.get("HTTPS_PROXY"),
                 "FTP_PROXY": os.environ.get("FTP_PROXY"),
                 "NO_PROXY": os.environ.get("NO_PROXY"),
                 "http_proxy": os.environ.get("http_proxy"),
                 "https_proxy": os.environ.get("https_proxy"),
                 "ftp_proxy": os.environ.get("ftp_proxy"),
                 "no_proxy": os.environ.get("no_proxy"),
                 "PATH": "/ch/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
                 # GNU tar, when it thinks it's running as root, tries to
                 # chown(2) and chgrp(2) files to whatever's in the tarball.
                 "TAR_OPTIONS": "--no-same-owner" }

ENV_DEFAULTS = { }


## Main ##

def main():

   atexit.register(ch.color_reset, sys.stdout, sys.stderr)

   if (not os.path.exists(CH_RUN)):
      ch.depfails.append(("missing", CH_RUN))

   global cli
   ap = argparse.ArgumentParser(
      formatter_class=argparse.RawDescriptionHelpFormatter,
      description="Build an image from a Dockerfile; completely unprivileged.",
      epilog="""\
Storage directory is used for caching and temporary images. Location: first
defined of --storage, $CH_GROW_STORAGE, and %s.
""" % ch.storage_default())
   ap.add_argument("--build-arg", action="append", default=None,
                   metavar="KEY=VALUE",
                   help="set build-time variables")
   ap.add_argument("--dependencies", action=ch.CLI_Dependencies,
                   help="print any missing dependencies and exit")
   ap.add_argument("-f", "--file", metavar="DOCKERFILE",
                   help="Dockerfile to use (default: CONTEXT/Dockerfile)")
   ap.add_argument("--list", action=CLI_List_Images,
                   help="List images ch-grow has built")
   ap.add_argument("-n", "--dry-run", action="store_true",
                   help="don't execute instructions")
   ap.add_argument("--no-cache", action="store_true",
                   help="ignored (layer caching not yet supported)")
   ap.add_argument("--parse-only", action="store_true",
                   help="stop after parsing the Dockerfile")
   ap.add_argument("--print-storage", action=CLI_Print_Storage,
                   help="print the state and images directory, then exit")
   ap.add_argument("-s", "--storage", metavar="DIR", default=ch.storage_env(),
                   help="builder internal storage directory")
   ap.add_argument("-t", "--tag", metavar="TAG",
                   help="name of image to create (default: inferred)")
   ap.add_argument("-v", "--verbose", action="count", default=0,
                   help="print extra chatter (can be repeated)")
   ap.add_argument("--version", action=ch.CLI_Version,
                   help="print version and exit")
   ap.add_argument("context", metavar="CONTEXT",
                   help="context directory")

   if (len(sys.argv) < 2):
       ap.print_help(file=sys.stderr)
       sys.exit(1)

   cli = ap.parse_args()
   ch.verbose = cli.verbose
   if (cli.file is None):
      cli.file = cli.context + "/Dockerfile"
   if (cli.tag is None):
      m = re.search(r"(([^/]+)/)?Dockerfile(\.(.+))?$",
                    os.path.abspath(cli.file))
      if (m is not None):
         if m.group(4):    # extension
            cli.tag = m.group(4)
         elif m.group(2):  # containing directory
            cli.tag = m.group(2)
   def build_arg_get(arg):
      kv = arg.split("=")
      if (len(kv) == 2):
         return kv
      else:
         v = os.getenv(kv[0])
         if (v is None):
            ch.FATAL("--build-arg: %s: no value and not in environment" % kv[0])
         return (kv[0], v)
   if (cli.build_arg is None):
      cli.build_arg = list()
   cli.build_arg = dict( build_arg_get(i) for i in cli.build_arg )

   ch.dependencies_check()

   global image
   image = ch.Image(ch.Image_Ref(cli.tag), cli.storage + "/dlcache",
                    cli.storage + "/img")
   ch.INFO("growing: %s" % image)
   ch.DEBUG("image path: %s" % image.unpack_path)
   ch.DEBUG(cli)

   global env
   env = Environment()

   parser = lark.Lark("?start: dockerfile\n" + ch.GRAMMAR,
                      parser="earley", propagate_positions=True)
   if (cli.file == "-"):
      text = sys.stdin.read()
   else:
      text = open(cli.file, "rt").read()
   # Avoid Lark issue #237: lark.exceptions.UnexpectedEOF if the file does not
   # end in newline.
   text += "\n"
   try:
      tree = parser.parse(text)
   except lark.exceptions.UnexpectedInput as x:
      ch.DEBUG(x)  # noise about what was expected in the grammar
      ch.FATAL("can't parse: %s:%d,%d\n\n%s" % (cli.file, x.line, x.column, x.get_context(text, 39)))

   ch.DEBUG(tree.pretty())
   if (cli.parse_only):
      sys.exit(0)

   # Guess whether the context is a URL, and error out if so. This can be a
   # typical looking URL e.g. "https://..." or also something like
   # "git@github.com:...". The line noise in the second line of the regex is
   # to match this second form. Username and host characters from
   # https://tools.ietf.org/html/rfc3986.
   if (re.search(r"""  ^((git|git+ssh|http|https|ssh)://
                     | ^[\w.~%!$&'\(\)\*\+,;=-]+@[\w.~%!$&'\(\)\*\+,;=-]+:)""",
                 cli.context, re.VERBOSE) is not None):
      ch.FATAL("not yet supported: issue #773: URL context: %s" % cli.context)
   if (os.path.exists(cli.context + "/.dockerignore")):
      ch.WARNING("not yet supported, ignored: issue #777: .dockerignore file")

   # We do not actually care whether the tree is traversed breadth-first or
   # depth-first, but we *do* care that the nodes are visited in order.
   # Neither visit() nor visit_topdown() are documented as of 2020-06-11 [1],
   # but examining the source code [2] shows that visit_topdown() uses
   # Tree.iter_trees_topdown(), which *is* documented to be in-order [3].
   #
   # This change seems to have been made in 0.8.6 (see PR #761); before then,
   # visit() was in order. Therefore, we call that instead, if visit_topdown()
   # is not present, to improve compatibility (see issue #792).
   #
   # [1]: https://lark-parser.readthedocs.io/en/latest/visitors/#visitors
   # [2]: https://github.com/lark-parser/lark/blob/445c8d4/lark/visitors.py#L211
   # [3]: https://lark-parser.readthedocs.io/en/latest/classes/#tree
   ml = Main_Loop()
   if (hasattr(ml, 'visit_topdown')):
      ml.visit_topdown(tree)
   else:
      ml.visit(tree)

   if (len(cli.build_arg) != 0):
      ch.FATAL("--build-arg: not consumed: " + " ".join(cli.build_arg.keys()))

   if (ml.instruction_ct == 0):
      ch.FATAL("no instructions found: %s" % cli.file)
   ch.INFO("done; %d instructions executed" % ml.instruction_ct)


class Main_Loop(lark.Visitor):

   def __init__(self, *args, **kwargs):
      self.instruction_ct = 0
      super().__init__(*args, **kwargs)

   def __default__(self, tree):
      class_ = "I_" + tree.data
      if (class_ in globals()):
         inst = globals()[class_](tree)
         inst.announce()
         if (self.instruction_ct == 0):
            if (   isinstance(inst, I_directive)
                or isinstance(inst, I_from_)):
               pass
            elif (isinstance(inst, Arg)):
               ch.WARNING("ARG before FROM not yet supported; see issue #779")
            else:
               ch.FATAL("first instruction must be ARG or FROM")
         inst.execute()
         self.instruction_ct += inst.execute_increment


## Instruction classes ##

class Instruction(abc.ABC):

   execute_increment = 1

   def __init__(self, tree):
      self.lineno = tree.meta.line
      self.options = {}
      for st in ch.tree_children(tree, "option"):
         k = ch.tree_terminal(st, "OPTION_KEY")
         v = ch.tree_terminal(st, "OPTION_VALUE")
         if (k in self.options):
            ch.FATAL("%3d %s: repeated option --%s"
                     % (self.lineno, self.str_name(), k))
         self.options[k] = v
      self.tree = tree

   def __str__(self):
      options = self.str_options()
      if (options != ""):
         options = " " + options
      return ("%3s %s%s %s"
              % (self.lineno, self.str_name(), options, self.str_()))

   def announce(self):
      ch.INFO(self)

   def execute(self):
      if (not cli.dry_run):
         self.execute_()

   @abc.abstractmethod
   def execute_(self):
      ...

   def options_assert_empty(self):
      try:
         k = next(iter(self.options.keys()))
         ch.FATAL("%s: invalid option --%s" % (self.str_name(), k))
      except StopIteration:
         pass

   @abc.abstractmethod
   def str_(self):
      ...

   def str_name(self):
      return self.__class__.__name__.split("_")[1].upper()

   def str_options(self):
      return " ".join("--%s=%s" % (k,v) for (k,v) in self.options.items())

   def unsupported_forever_warn(self, msg):
      ch.WARNING("not supported, ignored: %s %s" % (self.str_name(), msg))

   def unsupported_yet_warn(self, msg, issue_no):
      ch.WARNING("not yet supported, ignored: issue #%d: %s %s"
                 % (issue_no, self.str_name(), msg))

   def unsupported_yet_fatal(self, msg, issue_no):
      ch.FATAL("not yet supported: issue #%d: %s %s"
               % (issue_no, self.str_name(), msg))


class Instruction_Supported_Never(Instruction):

   execute_increment = 0

   def announce(self):
      self.unsupported_forever_warn("instruction")

   def str_(self):
      return "(unsupported)"

   def execute_(self):
      pass


class Arg(Instruction):

   def __init__(self, *args):
      super().__init__(*args)
      self.key = ch.tree_terminal(self.tree, "WORD", 0)
      if (self.key in cli.build_arg):
         self.value = cli.build_arg[self.key]
         del cli.build_arg[self.key]
      else:
         self.value = self.value_default()
      if (self.value is not None):
         self.value = variables_sub(self.value, env.env_build)

   def str_(self):
      if (self.value is None):
         return self.key
      else:
         return "%s='%s'" % (self.key, self.value)

   def execute_(self):
      if (self.value is not None):
         env.arg[self.key] = self.value


class I_arg_bare(Arg):

   def __init__(self, *args):
      super().__init__(*args)

   def value_default(self):
      return None


class I_arg_equals(Arg):

   def __init__(self, *args):
      super().__init__(*args)

   def value_default(self):
      v = ch.tree_terminal(self.tree, "WORD", 1)
      if (v is None):
         v = unescape(ch.tree_terminal(self.tree, "STRING_QUOTED"))
      return v


class I_copy(Instruction):

   def __init__(self, *args):
      super().__init__(*args)
      if (ch.tree_child(self.tree, "copy_shell") is not None):
         paths = [variables_sub(i, env.env_build)
                  for i in ch.tree_child_terminals(self.tree, "copy_shell",
                                                   "WORD")]
         self.srcs = paths[:-1]
         self.dst = paths[-1]
      else:
         assert (ch.tree_child(self.tree, "copy_list") is not None)
         self.unsupported_yet_fatal("list form", 784)

   def str_(self):
      return "%s -> %s" % (self.srcs, repr(self.dst))

   def execute_(self):
      # Complain about unsupported stuff.
      if (self.options.pop("chown", False)):
         self.unsupported_forever_warn("--chown")
      if (self.options.pop("from", False)):
         self.unsupported_yet_fatal("--from", 768)
      # Any remaining options are invalid.
      self.options_assert_empty()
      # The Dockerfile specification for COPY is complex, messy, inexplicably
      # different from cp(1), and incomplete. We try to be bug-compatible with
      # Docker but probably are not 100%. See the FAQ.
      #
      # None of this abstracted into a general copy function because the
      # semantics are really weird, so I don't want people calling it except
      # from here.
      srcs = list()
      for src in self.srcs:
         if (os.path.normpath(src).startswith("..")):
            ch.FATAL("can't COPY: %s climbs outside context" % src)
         for i in glob.glob(cli.context + "/" + src):
            srcs.append(i)
      if (len(srcs) == 0):
         ch.FATAL("can't COPY: no sources exist")
      dst = image.unpack_path + "/"
      if (not self.dst.startswith("/")):
         dst += env.workdir + "/"
      dst += self.dst
      if (dst.endswith("/") or len(srcs) > 1 or os.path.isdir(srcs[0])):
         # Create destination directory.
         if (dst.endswith("/")):
            dst = dst[:-1]
         if (os.path.exists(dst) and not os.path.isdir(dst)):
            ch.FATAL("can't COPY: %s exists but is not a directory" % dst)
         ch.mkdirs(dst)
      for src in srcs:
         # Check for symlinks to outside context.
         src_real = os.path.realpath(src)
         context_real = os.path.realpath(cli.context)
         if (not os.path.commonpath([src_real, context_real]) \
                 .startswith(context_real)):
            ch.FATAL("can't COPY: %s climbs outside context via symlink" % src)
         # Do the copy.
         if (os.path.isfile(src)):   # or symlink to file
            ch.DEBUG("COPY via copy2 file %s to %s" % (src, dst))
            shutil.copy2(src, dst, follow_symlinks=True)
         elif (os.path.isdir(src)):  # or symlink to directory
            # Copy *contents* of src, not src itself. Note: shutil.copytree()
            # has a parameter dirs_exist_ok that I think will make this easier
            # in Python 3.8.
            ch.DEBUG("COPY dir %s to %s" % (src, dst))
            if (not os.path.isdir(dst)):
               ch.FATAL("can't COPY: destination not a directory: %s to %s"
                        % (src, dst))
            for src2_basename in os.listdir(src):
               src2 = src + "/" + src2_basename
               if (os.path.islink(src2)):
                  # Symlinks within directories do not get dereferenced.
                  ch.DEBUG("symlink via copy2: %s to %s" % (src2, dst))
                  shutil.copy2(src2, dst, follow_symlinks=False)
               elif (os.path.isfile(src2)):  # not symlink to file
                  ch.DEBUG("file via copy2: %s to %s" % (src2, dst))
                  shutil.copy2(src2, dst)
               elif (os.path.isdir(src2)):   # not symlink to directory
                  dst2 = dst + "/" + src2_basename
                  ch.DEBUG("directory via copytree: %s to %s" % (src2, dst2))
                  shutil.copytree(src2, dst2, symlinks=True,
                                  ignore_dangling_symlinks=True)
               else:
                  ch.FATAL("can't COPY unknown file type: %s" % src2)
         else:
            ch.FATAL("can't COPY unknown file type: %s" % src)


class I_directive(Instruction_Supported_Never):

   def __init__(self, *args):
      super().__init__(*args)

   def announce(self):
      ch.WARNING("not supported, ignored: parser directives")


class Env(Instruction):

   def str_(self):
      return "%s='%s'" % (self.key, self.value)

   def execute_(self):
      env.env[self.key] = self.value
      with open(image.unpack_path + "/ch/environment", "wt") as fp:
         for (k, v) in env.env.items():
            print("%s=%s" % (k, v), file=fp)


class I_env_equals(Env):

   def __init__(self, *args):
      super().__init__(*args)
      self.key = ch.tree_terminal(self.tree, "WORD", 0)
      self.value = ch.tree_terminal(self.tree, "WORD", 1)
      if (self.value is None):
         self.value = unescape(ch.tree_terminal(self.tree, "STRING_QUOTED"))
      self.value = variables_sub(self.value, env.env_build)


class I_env_space(Env):

   def __init__(self, *args):
      super().__init__(*args)
      self.key = ch.tree_terminal(self.tree, "WORD")
      value = ch.tree_terminal(self.tree, "LINE")
      if (not value.startswith('"')):
         value = '"' + value + '"'
      self.value = unescape(value)
      self.value = variables_sub(self.value, env.env_build)


class I_from_(Instruction):

   def __init__(self, *args):
      super().__init__(*args)
      self.base_ref = ch.Image_Ref(ch.tree_child(self.tree, "image_ref"))
      self.base_image = ch.Image(self.base_ref, image.download_cache,
                                 image.unpack_dir)
      self.alias = ch.tree_child_terminal(self.tree, "from_alias",
                                          "IR_PATH_COMPONENT")

   def execute_(self):
      # Complain about unsupported stuff.
      if (self.options.pop("platform", False)):
         self.unsupported_yet_fatal("--platform", 778)
      # Any remaining options are invalid.
      self.options_assert_empty()
      # Other error checking.
      if (str(image.ref) == str(self.base_ref)):
         ch.FATAL("output image ref same as FROM: %s" % self.base_ref)
      if (not os.path.isdir(self.base_image.unpack_path)):
         ch.DEBUG("image not found, pulling: %s" % self.base_image.unpack_path)
         self.base_image.pull_to_unpacked(fixup=True)
      image.copy_unpacked(self.base_image)
      env.reset()

   def str_(self):
      alias = "AS %s" % self.alias if self.alias else ""
      return "%s %s" % (self.base_ref, alias)


class Run(Instruction):

   def execute_(self):
      rootfs = image.unpack_path
      ch.file_ensure_exists(rootfs + "/etc/resolv.conf")
      ch.file_ensure_exists(rootfs + "/etc/hosts")
      args = [CH_BIN + "/ch-run", "-w", "--no-home", "--no-passwd",
              "--cd", env.workdir, "--uid=0", "--gid=0",
              rootfs, "--"] + self.cmd
      ch.cmd(args, env=env.env_build)

   def str_(self):
      return str(self.cmd)


class I_run_exec(Run):

   def __init__(self, *args):
      super().__init__(*args)
      self.cmd = [    variables_sub(unescape(i), env.env_build)
                  for i in ch.tree_terminals(self.tree, "STRING_QUOTED")]


class I_run_shell(Run):

   def __init__(self, *args):
      super().__init__(*args)
      # FIXME: Can't figure out how to remove continuations at parse time.
      cmd = ch.tree_terminal(self.tree, "LINE").replace("\\\n", "")
      self.cmd = ["/bin/sh", "-c", cmd]


class I_workdir(Instruction):

   def __init__(self, *args):
      super().__init__(*args)
      self.path = variables_sub(ch.tree_terminal(self.tree, "LINE"),
                                env.env_build)

   def str_(self):
      return self.path

   def execute_(self):
      env.chdir(self.path)
      ch.mkdirs(image.unpack_path + env.workdir)


class I_uns_forever(Instruction_Supported_Never):

   def __init__(self, *args):
      super().__init__(*args)
      self.name = ch.tree_terminal(self.tree, "UNS_FOREVER")

   def str_name(self):
      return self.name


class I_uns_yet(Instruction):

   execute_increment = 0

   def __init__(self, *args):
      super().__init__(*args)
      self.name = ch.tree_terminal(self.tree, "UNS_YET")
      self.issue_no = { "ADD":         782,
                        "CMD":         780,
                        "ENTRYPOINT":  780,
                        "LABEL":       781,
                        "ONBUILD":     788,
                        "SHELL":       789 }[self.name]

   def announce(self):
      self.unsupported_yet_warn("instruction", self.issue_no)

   def str_(self):
      return "(unsupported)"

   def str_name(self):
      return self.name

   def execute_(self):
      pass


## Supporting classes ##

class CLI_Print_Storage(ch.CLI_Action_Exit):

   def __call__(self, _, namespace, *args, **kwargs):
      print(namespace.storage)
      sys.exit(0)

class CLI_List_Images(ch.CLI_Action_Exit):

   def __call__(self, _, namespace, *args, **kwargs):
      ch.dependencies_check()
      images = os.listdir(namespace.storage + '/img/')
      for image in images:
         print(ch.Image_Ref(image))
      sys.exit(0)


class Environment:
   "The state we are in: environment variables, working directory, etc."

   def __init__(self):
      self.reset()

   @property
   def env_build(self):
      return { **self.arg, **self.env }

   def chdir(self, path):
      if (path.startswith("/")):
         self.workdir = path
      else:
         self.workdir += "/" + path

   def reset(self):
      self.workdir = "/"
      self.arg = { k: v for (k, v) in ARG_DEFAULTS.items() if v is not None }
      self.env = { k: v for (k, v) in ENV_DEFAULTS.items() if v is not None }


## Supporting functions ###

def variables_sub(s, variables):
   # FIXME: This should go in the grammar rather than being a regex kludge.
   #
   # Dockerfile spec does not say what to do if substituting a value that's
   # not set. We ignore those subsitutions. This is probably wrong (the shell
   # substitutes the empty string).
   for (k, v) in variables.items():
      # FIXME: remove when issue #774 is fixed
      m = re.search(r"(?<!\\)\${.+?:[+-].+?}", s)
      if (m is not None):
         ch.FATAL("modifiers ${foo:+bar} and ${foo:-bar} not yet supported (issue #774)")
      s = re.sub(r"(?<!\\)\${?%s}?" % k, v, s)
   return s

def unescape(sl):
   # FIXME: This is also ugly and should go in the grammar.
   #
   # The Dockerfile spec does not precisely define string escaping, but I'm
   # guessing it's the Go rules. You will note that we are using Python rules.
   # This is wrong but close enough for now (see also gripe in previous
   # paragraph).
   if (not (sl.startswith('"') and sl.endswith('"'))):
      ch.FATAL("string literal not quoted")
   return ast.literal_eval(sl)


## Bootstrap ##

if (__name__ == "__main__"):
   main()
