#!/usr/bin/python3.6
#
# Manage adding and removing CVMFS replicas based on a configuration
#  file and the repositories list from stratum 0s or other stratum 1s
# Written by Dave Dykstra, February 2017

from optparse import OptionParser
import urllib.request, urllib.error, urllib.parse
import sys
import os
import time
import json
import subprocess
import fnmatch

prog = 'manage-replicas'
addcmd = 'add-repository @fqrn@ @url@'
remcmd = 'remove-repository -f @fqrn@'
replist = ''
source = ''
keypath = ''
keysource = ''
excludes = []
oldrepos = []
repospecs = {}
replicalist = []

usagestr = "Usage: %prog [-h ] [ other_options ]\n" \
           "    Use -h to see help"
parser = OptionParser(usage=usagestr, prog=prog)
parser.add_option("-f", "--config",
                  metavar="file", default="/etc/cvmfs/manage-replicas.conf",
                  help="use file as configuration file. Default value: /etc/cvmfs/manage-replicas.conf")
parser.add_option("-n", "--dry-run",
                  action="store_true", default=False,
                  help="Show what would be done, don't actually do it")
parser.add_option("-l", "--list",
                  action="store_true", default=False,
                  help="Show full source url of all repos and exit")
parser.add_option("-r", "--remove",
                  action="store_true", default=False,
                  help="Remove repositories that don't belong, don't just warn about them")
parser.add_option("-c", "--continue-failed",
                  action="store_true", default=False,
                  help="Continue failed initial snapshots, replacing url with \'continue\'")
parser.add_option("-d", "--repo-directory",
                  metavar="dir", default='/srv/cvmfs',
                  help="Directory to find repositories in (for -c option)")
parser.add_option("-k", "--only-download-keys",
                  action="store_true", default=False,
                  help="Only download any missing keys, do not add replicas")
(options, args) = parser.parse_args(sys.argv)

def logmsg(msg):
  print(time.asctime(time.localtime(time.time())) + ' ' + msg)
  sys.stdout.flush()

def fatal(msg, code=1):
  print(prog + ": " + msg, file=sys.stderr)
  sys.exit(code)

def efatal(msg, e, code=1):
  fatal(msg + ': ' + type(e).__name__ + ': ' + str(e), code)

# read the config file
try:
  fd = open(options.config, 'r')
  lines = fd.readlines()
  fd.close()
except Exception as e:
  efatal('could not open config file', e)

#
# go through the config file lines
#
linenum = 0

def configfatal(msg):
  fatal(options.config + ' line ' + str(linenum) + ': ' + msg)

if not options.list:
  logmsg('Starting')

# find out existing repository names
for dir in os.listdir('/etc/cvmfs/repositories.d'):
  if os.path.exists('/etc/cvmfs/repositories.d/' + dir + '/replica.conf'):
    oldrepos.append(dir)

def excluded(repo):
  for exclude in excludes:
    if fnmatch.fnmatch(repo, exclude):
      return True
  return False

# go through the config file
for line in lines:
  linenum += 1
  # remove comments and trailing whitespace
  ihash = line.find('#')
  if ihash >= 0:
    line = line[0:ihash]
  line = line.rstrip()
  if line == "":
    continue
  (key, value) = line.split(None, 1)
  if key == 'addcmd':
    addcmd = value
  elif key == 'remcmd':
    remcmd = value
  elif key == 'replist':
    try:
      response = urllib.request.urlopen(value)
      reps = json.loads(response.read().decode())
    except Exception as e:
      efatal('failure reading and/or decoding ' + value, e)
    replicalist = []
    for typ in ['replicas', 'repositories']:
      for rep in reps[typ]:
        replicalist.append(rep['name'])
  elif key == 'source':
    source = value
  elif key == 'keypath':
    keypath = value
  elif key == 'keysource':
    keysource = value
  elif key == 'exclude':
    for exclude in value.split():
      excludes.append(exclude)
  elif key == 'repos':
    if source == '':
      configfatal('No source specified before repos')
    for repo in value.split():
      if '*' in repo or '?' in repo or '[' in repo or ']' in repo:
        # match against a replist
        if len(replicalist) == 0:
          configfatal('No replist specified before repos')
        for replica in replicalist:
          if fnmatch.fnmatch(replica, repo):
            if not excluded(replica):
              repospecs[replica] = [source, keypath, keysource, addcmd, remcmd]
        if not options.list:
          # look for extra repositories matching this wildcard
          for oldrepo in oldrepos:
            if oldrepo in repospecs:
              continue
            if excluded(oldrepo):
              continue
            if fnmatch.fnmatch(oldrepo, repo):
              if options.remove:
                cmd = remcmd.replace('@fqrn@',oldrepo)
                logmsg('Running ' + cmd)
                if not options.dry_run:
                  code = os.system(cmd)
                  if code != 0:
                    logmsg('Remove failed with exit code ' + hex(code))
              else:
                logmsg('WARNING: extra repository ' + oldrepo + ' matches managed repos ' + repo)
      else:
        # no wildcards, just see if it has to be excluded
        #  although that's rather unlikely
        if not excluded(repo):
          repospecs[repo] = [source, keypath, keysource, addcmd, remcmd]
      # exclude this repo pattern from future matches
      excludes.append(repo)
  else:
    configfatal('unrecognized keyword: ' + key)

if options.list:
  for repo, value in sorted(repospecs.items()):
    source = value[0]
    print(source + '/cvmfs/' + repo)
  sys.exit(0)

# download a file from github, following symlinks
def download_github_file(infile, outfile):
  # python2 https loading isn't reliable so call out to curl
  try:
    cmd = 'curl -m 10 -f -s https://raw.githubusercontent.com/' + infile
    answer = subprocess.check_output(cmd, shell=True)
  except:
    return False
  answer = answer.decode()
  if answer.count('\n') > 0:
    try:
      with open(outfile, 'w') as fd:
        fd.write(answer)
    except:
      logmsg('Error writing ' + outfile)
      return False
  else:
    # does not have a newline, follow a symlink
    infile = os.path.normpath(os.path.dirname(infile) + '/' + answer)
    cmd = 'curl -m 10 -f -s -o ' + outfile + \
                ' https://raw.githubusercontent.com/' + infile
    if os.system(cmd) != 0:
      logmsg('Error following symlink for ' + outfile + ' to ' + infile)
      return False

  logmsg('Downloaded ' + outfile)
  return True

prevdomains = []
for repo, value in sorted(repospecs.items()):
  (url, keypath, keysource, addcmd, remcmd) = value
  domain = repo.split('.',1)[1]
  if len(keysource) > 0 and domain not in prevdomains:
    # this is fairly complicated so only check once per domain
    prevdomains.append(domain)
    # first make sure domain key exists
    keydir = '/etc/cvmfs/keys'
    if options.dry_run:
      keydir = '/tmp'
    if not os.path.isfile(keydir + '/' + domain + '.pub') and \
        not os.path.isdir(keydir + '/' + domain):
      pubpath = '/' + domain + '.pub'
      if not download_github_file(keysource + pubpath, keydir + pubpath):
        # use github api to list keys in a keydir
        keysindir = []
        kss = keysource.split('/',3)
        try:
          # NOTE: this api is only allowed to be used 60 times
          #   per hour per IP address
          cmd = 'curl -m 10 -f -s ' + \
                  '-H "Accept: application/vnd.github.v3+json" ' + \
                  '"https://api.github.com/repos/' + \
                  kss[0] + '/' + kss[1] + '/contents/' + kss[3] + \
                  '/' + domain + '?ref=' + kss[2] + '"'
          answer = subprocess.check_output(cmd, shell=True)
        except:
          pass
        else:
          arr = json.loads(answer)
          for rec in arr:
            if 'name' in rec:
              keysindir.append(rec['name'])
        pubdir = '/' + domain
        if len(keysindir) > 0:
          os.mkdir(keydir + pubdir)
        else:
          logmsg('No pub key found for ' + domain)
        for key in keysindir:
          pubpath = pubdir + '/' + key
          download_github_file(keysource + pubpath, keydir + pubpath)
  if options.only_download_keys:
    continue
  serverconf = '/etc/cvmfs/repositories.d/' + repo + '/server.conf'
  replicaconf = '/etc/cvmfs/repositories.d/' + repo + '/replica.conf'
  addrepo = True
  if os.path.exists(serverconf):
    addrepo = False
    # Repo exists.  If there's no replica.conf, it may
    #   be blanked so skip that.
    # If replica.conf exists and the full url does not
    #   match what is in server.conf, edit server.conf.
    fullurl = url + '/cvmfs/' + repo
    if os.path.exists(replicaconf):
      contents = open(serverconf).read()
      start = contents.find('CVMFS_STRATUM0=')
      if start > 0:
        start += len('CVMFS_STRATUM0=')
        end = contents.find('\n', start)
        if contents.find('{', start, end) >= 0:
          #
          # look past through the hyphen sign inside the curly brackets
          # this is a convention in the cvmfs-hastratum1 package
          start = contents.find('-', start, end)
          if (start > 0):
            start += 1
      if start > 0:
        if fullurl != contents[start:start+len(fullurl)]:
          end = contents.find('}', start, end)
          if end < 0:
            end = contents.find('\n',start)
          if (end > 0):
            contents = contents[:start] + fullurl + contents[end:]
            logmsg('Setting new url for ' + repo + ': ' + fullurl)
            if not options.dry_run:
              open(serverconf, 'w').write(contents)
      if options.continue_failed and \
          not os.path.exists(options.repo_directory +'/' + repo + '/.cvmfs_last_snapshot') and \
          not os.path.exists('/var/spool/cvmfs/' + repo + '/is_snapshotting.lock') and \
          not os.path.exists('/var/spool/cvmfs/' + repo + '/is_updating.lock'):
        addrepo = True
        url = 'continue'
  if addrepo:
    repokeypath = keypath
    cmd = addcmd.replace('@url@', url).replace('@fqrn@', repo).replace('@keypath@', repokeypath)
    logmsg('Running ' + cmd)
    if not options.dry_run:
      code = os.system(cmd)
      if code != 0:
        logmsg('Add failed with exit code ' + hex(code))
        if not options.continue_failed:
          cmd = remcmd.replace('@fqrn@',repo)
          logmsg('Running ' + cmd)
          code = os.system(cmd)
          if code != 0:
            logmsg('Undo failed with exit code ' + hex(code))

logmsg('Finished')
