#!/bin/sh

set -e  # Exit immediately if a simple command exits with a non-zero status

# $rs_line must be the same as in the prechecking.sh script:
rs_line_fields2to7="\| human \| 9606 \| snp \| [^|]* \| [^|]* \| [^|]*"
rs_line="^rs[^|]* $rs_line_fields2to7"
CTG_assembly="| CTG | assembly=GRCh37\\.p5 |"

# We keep strict SNPs only (tagged "snp") i.e. we drop lines tagged as
# "in-del", "mixed", "microsatellite", "named-locus" or "multinucleotide-polymorphism"
grep -E "$rs_line \|" | \
  grep '| notwithdrawn |' | \
  # Filtering out SNPs with multiple reported locations on GRCh37.p5
  grep -v "$CTG_assembly.*$CTG_assembly" | \
  grep "$CTG_assembly chr=$1 |" | \
  # Filtering out SNPs with an unspecified location on GRCh37.p5
  grep -v "$CTG_assembly chr=$1 | chr-pos=? |" | \
  sed -r "s/^(rs[^|]*) (\||\|.*\|) SNP \| alleles='([^|]*)' \| het=([^|]*) \| se\(het\)=([^|]*) (\||\|.*\|) CTG \| assembly=GRCh37\.p5 \| chr=($1) \| chr-pos=([^|]*) \| [^|]* \| [^|]* \| [^|]* \| [^|]* \| orient=([^|]*)(\|)?.*$/\1 \3 \4 \5 \7 \8 \9/"
  #sed -r "s/^rs[^|]* (\||\|.*\|) SNP \| alleles=[^|]* \| het=[^|]* \| se(het)=[^|]* (\||\|.*\|) CTG \| assembly=GRCh37\.p5 \| chr=$1 \| chr-pos=[^|]* \| [^|]* \| [^|]* \| [^|]* \| [^|]* \| orient=[^|]*($| \|)/xx/"
  #sed -r 's/\| notwithdrawn (\||\|.*\|) CTG \| assembly=GRCh37.p5 \|/|/'

