#! /bin/bash
# Generate navigational pages for DocServ.
#
# Mandatory parameters:
#   --stitched-config="/path/to/file.xml"    # Full docservconfig file
#                                              (positive version)
#   --template-dir="/path/to/templates"
#   --cache-dir="/var/cache/docserv/target"  # Document metadata cache directory
#                                              as generated by docserv script
#   --output-dir="/path/to/output"           # Where to output HTML files
#   --ui-languages="en-us de-de"             # Languages that are supported
#                                              by the UI templates
#   --default-ui-language="en-us"            # Default language of UI
#                                              translations, used to find
#                                              fallback translations.
#   --site-sections="site1 site2"            # Site sections that are supported
#   --default-site-section="site1"           # Default site section, also used
#                                              as the fallback
#   --omit-lang-path="en-us"                 # Allows omitting the path
#                                              component to the default page
#                                              language
#   --internal-mode                          # Enable features that are not
#                                              supposed to be shown publically
#   --product                                # Product to build UI for
#   --docset                                 # Docset to build UI for
#   --base_path                              # Relative path to the root of the
#                                              docserv2 directory on the host
#
# Optional parameters:
#   --fragment-dir                           # Directory for translatable SSI
#                                              fragments
#   --fragment-l10n-dir                      # Directory path for fragment
#                                              translations (mandatory if
#                                              previous parameter is set)
#   --help                                   # Show this help screen


out() {
  >&2 echo -e "$1"
  exit 1
}


me=$(test -L $(realpath $0) && readlink $(realpath $0) || echo $(realpath $0))
mydir=$(dirname $me)

source $mydir/docserv-dirs

app_help() {
  sed -rn '/#!/{n; p; :loop n; p; /^[ \t]*$/q; b loop}' $me | sed -r -e 's/^# ?//' -e "s/\\\$0/$(basename $0)/"
  exit
}

xsltproc='xsltproc'

xmllint='xmllint'

jing='jing'
stacksize=${stacksize:-"-Xss4096K"}
java_flags="-Dorg.apache.xerces.xni.parser.XMLParserConfiguration=org.apache.xerces.parsers.XIncludeParserConfiguration"

starlet='xmlstarlet'

fragment_l10n_schema=$share_dir/build-navigation/fragment-l10n-schema.rnc
fragment_stylesheet=$share_dir/build-navigation/generate-localized-fragments.xsl

all_stylesheet=$share_dir/build-navigation/list-all-products.xsl
related_stylesheet=$share_dir/build-navigation/list-related-products.xsl

stylesheet=$share_dir/build-navigation/build-navigation-json.xsl
docserv_js=$share_dir/build-navigation/web-resources/docservui.js

for dependency in $fragment_l10n_schema $fragment_stylesheet \
  $all_stylesheet $related_stylesheet $stylesheet \
  $docserv_js; do
  [[ ! -f "$dependency" ]] && out "File $dependency does not exist.$(readme_message)"
done

stitched_config=

ui_languages=
site_sections=
default_site_section=
omit_lang_path=
relevant_product=
relevant_docset=

template_dir=
cache_dir=
output_dir=
internal_mode='false'

enable_ssi_fragments=0
fragment_dir=
fragment_l10n_dir=

for i in "$@"
  do
    case $i in
      -h|--help)
        app_help
      ;;
      --internal-mode)
        internal_mode='true'
      ;;
      --product=*)
        relevant_product="${i#*=}"
      ;;
      --docset=*)
        relevant_docset="${i#*=}"
      ;;
      --default-ui-language=*)
        default_ui_language="${i#*=}"
      ;;
      --ui-languages=*)
        ui_languages="${i#*=}"
      ;;
      --site-sections=*)
        site_sections="${i#*=}"
      ;;
      --default-site-section=*)
        default_site_section="${i#*=}"
      ;;
      --omit-lang-path=*)
        omit_lang_path="${i#*=}"
      ;;
      --stitched-config=*)
        stitched_config="${i#*=}"
      ;;
      --template-dir=*)
        template_dir="${i#*=}"
      ;;
      --cache-dir=*)
        cache_dir="${i#*=}"
      ;;
      --output-dir=*)
        output_dir="${i#*=}"
      ;;
      --base-path=*)
        base_path="${i#*=}"
      ;;
      --fragment-dir=*)
        enable_ssi_fragments=1
        fragment_dir="${i#*=}"
      ;;
      --fragment-l10n-dir=*)
        fragment_l10n_dir="${i#*=}"
      ;;
      *)
        unknown+="  $i\n"
      ;;
    esac
done

[[ "$unknown" ]] && \
  out "There are unknown parameters:\n$unknown"

for lang in "$ui_languages"; do
  [[ $(echo -e " $lang" | sed -r 's/( [a-z]{2}(-[a-z]{2,5})?)+//') ]] && out "Language parameter ($lang) is not in the right format."
done

[[ $(echo -e "$default_ui_language" | sed -r 's/[a-z]{2}(-[a-z]{2,5})?//') ]] && out "Default language parameter ($lang) is not in the right format."

[[ ! $(echo " $site_sections" | grep -oP '^( +[-_a-zA-Z0-9]+)+$') ]] && \
  out "Site sections parameter does not conform to scheme (must be alphanumeric-_, space-separated)."

[[ ! $(echo "$default_site_section" | grep -oP '^[-_a-zA-Z0-9]+$') ]] && \
  out "Default site section parameter does not conform to scheme (must be a single alphanumeric-_ value)."
default_site_section_valid=0

site_sections_deduped=' '
for site_section in $site_sections; do
  [[ ! $(echo "$site_sections_deduped" | grep -- " $site_section ") ]] && site_sections_deduped+="$site_section "
done
site_sections_deduped=$(echo -e "$site_sections_deduped" | tr ' ' '\n' | sed -n '/^$/!p')

for site_section in $site_sections_deduped; do
  [[ "$site_section" == "$default_site_section" ]] && { default_site_section_valid=1; break; }
done
site_sections_one_line=$(echo -e "$site_sections_deduped" | tr '\n' ' ' | sed -r 's/\s*$//')
[[ "$default_site_section_valid" -eq 1 ]] || out "Default site section parameter contains invalid site section reference ($default_site_section). (Valid site sections: $site_sections_one_line)"

for file in "$stitched_config"; do
  [[ ! -f "$file" ]] && out "File $file does not exist."
done

for dir in "$template_dir" "$output_dir"; do
  [[ ! -d "$dir" ]] && out "Directory $dir does not exist."
done

# If this is the first build on a new instance and the first product built does
# not have a `<builddocs/>` section, we need to create the cache dir here.
# In essentially any other case, the cache dir should already exist.
[[ ! -d "$cache_dir" ]] && mkdir -p "$cache_dir"

# FIXME: This kind of validation is interesting for CI too, but nestled in here,
# it's not very accessible to that sort of tooling. It would also be good to
# validate our templates _before_ starting to build anything, like we do with
# stitch for the product configuration.
if [[ "$enable_ssi_fragments" -eq 1 ]]; then
  for dir in "$fragment_dir" "$fragment_l10n_dir"; do
    [[ ! -d "$dir" ]] && out "Directory $dir does not exist."
  done
  # FIXME: there is no proper error mode here for when there are no fragments
  # but the option is enabled nonetheless.
  for file in "$fragment_dir/"*.fragment.html; do
    # Writing a schema that can take into account all possible XHTML 5
    # constructs, including possibly embedded SVGs etc. would be hard.
    # Leave that as an exercise for a later date.
    wellformedcheck=$(xmllint --noout "$file")
    [[ $? -gt 0 ]] && out "Fragment file $file is not well-formed XML:\n$validate"
  done
  fragment_l10n_fallback="$fragment_l10n_dir/$default_ui_language.xml"
  [[ ! -f "$fragment_l10n_fallback" ]] && out "File $fragment_l10n_fallback does not exist."
  for ui_language in $ui_languages; do
    l10n_file="$fragment_l10n_dir/$ui_language.xml"
    # If the file does not exist, we just use the fallback language file
    [[ ! -f "$l10n_file" ]] && continue
    validate=$(2>&1 ADDITIONAL_FLAGS="$java_flags" ADDITIONAL_OPTIONS="$java_flags" \
        $jing -ci "$fragment_l10n_schema" "$l10n_file")
    [[ $? -gt 0 ]] && out "Fragment localization file $l10n_file is not valid:\n$validate"
    l10n_keys=$($starlet sel -t -v '//*[local-name(.) = "content"]/@key' $l10n_file | sort)
    unique_l10n_keys=$(echo -e "$l10n_keys" | sort -u)
    if [[ "$l10n_keys" != "$unique_l10n_keys" ]]; then
      duplicated_keys=$(comm -2 -3 <(echo -e "$l10n_keys") <(echo -e "$unique_l10n_keys") | tr '\n' ' ')
      out "Some fragment localization keys in $l10n_file are not unique: ${duplicated_keys}."
    fi
  done
fi

allproducts=$($xsltproc "$all_stylesheet" "$stitched_config")
relatedproducts=$($xsltproc \
  --stringparam product "$relevant_product" \
  --stringparam docset "$relevant_docset" \
  --stringparam internal-mode "$internal_mode" \
  "$related_stylesheet" "$stitched_config" | \
  sort -u)

if [[ ! $(echo -e "$allproducts" | grep -oP "^${relevant_product}/${relevant_docset}\$") ]]; then
  out "Either product $relevant_product or docset $relevant_docset does not exist."
fi

[[ -n "$omit_lang_path" ]] && omit_lang_path="${omit_lang_path}/"

template_dir=$(readlink -f $template_dir)
cache_dir=$(readlink -f $cache_dir)
output_dir=$(readlink -f $output_dir)

template_default=$(ls "$template_dir/template-section-default."* 2>/dev/null | head -1)
template_product=$(ls "$template_dir/template-product."* 2>/dev/null | head -1)

template_resources=$template_dir/res

for file in $template_main $template_product $template_resources; do
  ([[ ! -d "$file" ]] && [[ ! -f "$file" ]]) && out "File/directory $file does not exist."
done

# CREATE DIRECTORY TREE FOR OUTPUT
temp_dir=$(mktemp -d /tmp/docserv-build-navigation-XXXXXXXX)

# Where to place the JSON data files
data_path='docserv/data'
# Where to place the template's resource files (JS, CSS, images)
res_path='docserv/res/'
# Where to place fragment files
fragment_path='docserv/fragments'


if [[ $enable_ssi_fragments -eq 1 ]]; then

  for ui_language in $ui_languages; do
    fragment_l10n_file="$fragment_l10n_dir/$ui_language.xml"
    if [[ ! -f "$fragment_l10n_file" ]]; then
      fragment_l10n_file="$fragment_l10n_fallback"
    fi
    mkdir -p "$output_dir/$fragment_path/$ui_language"
    for fragment in $fragment_dir/*.fragment.html; do
      localized_fragment="$output_dir/$fragment_path/$ui_language/"$(basename "$fragment")
      xsltproc \
        --stringparam "l10n-file" "$fragment_l10n_file" \
        --stringparam "fallback-l10n-file" "$fragment_l10n_fallback" \
        "$fragment_stylesheet" \
        "$fragment" \
        > "$localized_fragment"
      sed -ri \
        -e 's/\s*$//' \
        -e 's/@(\{\{|%7B%7B)#year#(}}|%7D%7D)/'"$(date '+%Y')"'/g' \
        -e 's/@(\{\{|%7B%7B)#ui_language#(}}|%7D%7D)/'"$ui_language"'/g' \
          "$localized_fragment"
      if [[ "$ui_language" != "$default_ui_language" ]]; then
        sed -ri \
          -e 's/@(\{\{|%7B%7B)#ui_language_omissible#(}}|%7D%7D)/'"$ui_language"'/g' \
          -e 's/@(\{\{|%7B%7B)#ui_language_omissible_slash#(}}|%7D%7D)/'"$ui_language"'\//g' \
           "$localized_fragment"
      else
        sed -ri \
          -e 's/@(\{\{|%7B%7B)#ui_language_omissible#(}}|%7D%7D)//g' \
          -e 's/@(\{\{|%7B%7B)#ui_language_omissible_slash#(}}|%7D%7D)//g' \
            "$localized_fragment"
      fi
      sed -ni '/^$/ !p' "$localized_fragment"
    done
  done
fi


for product in $allproducts; do
  mkdir -p $output_dir/$data_path/$product
  for lang in $ui_languages; do
    mkdir -p $output_dir/$lang/$product
  done
done

cache_file=$temp_dir/cache.xml
cache_files=$(find "$cache_dir" -name '*.xml')
stitched_cache='<?xml version="1.0" encoding="UTF-8"?>\n<docservcache>\n\n'
for file in $cache_files; do
  stitched_cache+=$($starlet sel -t -c "(/document|/archive)" $file)
  stitched_cache+='\n'
done
stitched_cache+='\n</docservcache>\n'
echo -e "$stitched_cache" > $cache_file

for product_docset in "${relevant_product}/${relevant_docset}" $relatedproducts; do

  this_product=$(echo "$product_docset" | cut -f1 -d'/')
  this_docset=$(echo "$product_docset" | cut -f2 -d'/')

  xsltproc \
    --stringparam "output_root" "$output_dir/$data_path/" \
    --stringparam "cache_file" "$cache_file" \
    --stringparam "internal_mode" "$internal_mode" \
    --stringparam "ui_languages" "$ui_languages" \
    --stringparam "site_sections" "$site_sections_deduped" \
    --stringparam "default_site_section" "$default_site_section" \
    --stringparam "product" "$this_product" \
    --stringparam "docset" "$this_docset" \
    "$stylesheet" \
    "$stitched_config"

done

# Clean up stray ',' characters that are extremely hard to avoid when
# generating JSON via XSLT.
json_files=$(find "$output_dir" -name '*.json')
for json_file in $json_files; do
  sed -r -e 's/\s*$//' "$json_file" | \
    tr '\n' '\r' | \
    sed -r \
      -e 's/,(\s*|\r*)([]}])/\2/g' \
      -e 's/\r\r*/\r/g' \
      -e 's/\r/\n/g' | \
    sed -n '/^\s*$/ !p' \
      > "$json_file.0"
  mv "$json_file.0" "$json_file"
done

# Clean up & then copy images, CSS, & JS resources again
rm -rf $output_dir/$res_path
mkdir -p $output_dir/$res_path
cp $docserv_js $output_dir/$res_path
cp -r $template_resources/* $output_dir/$res_path

for lang in $ui_languages; do

  mkdir -p $output_dir/$lang


  for site_section in $site_sections_deduped; do

    for lifecycle in supported unsupported; do

      fallback_template=0
      template_current=$(ls "$template_dir/template-section.$site_section.$lifecycle."* 2>/dev/null | head -1)
      [[ ! -f "$template_current" ]] && { template_current="$template_default"; fallback_template=1; }
      ext=html
      [[ $(echo "$template_current" | grep -oP '[^.]+$') ]] && ext=$(echo "$template_current" | grep -oP '[^.]+$')

      template_out="${site_section}-${lifecycle}.${ext}"
      if [[ "$site_section" == "$default_site_section" && "$lifecycle" == 'supported' ]]; then
        template_out="index.${ext}"
        echo "Setting $site_section/$lifecycle as default ($lang/${template_out})."
      fi
      [[ "$fallback_template" -eq 1 ]] && echo "Using fallback template $(basename ${template_current}) for ${lang}/${template_out}."
      cat "$template_current" | sed -r \
        -e 's%@\{\{#base_path#}}%'"${base_path}"'%g' \
        -e 's%@\{\{#base_path_res#}}%'"${base_path}${res_path}"'%g' \
        -e 's%@\{\{#page_role#}}%'"section"'%g' \
        -e 's%@\{\{#section_name#}}%'"${site_section}.${lifecycle}"'%g' \
        -e 's%@\{\{#template_extension#}}%'"$ext"'%g' \
        -e 's%@\{\{#ui_language#}}%'"$lang"'%g' \
        -e 's%@\{\{#omit_path_component#}}%'"$omit_lang_path"'%g' \
        > "$output_dir/$lang/${template_out}"

    done

  done

  for product_docset in "${relevant_product}/${relevant_docset}" $relatedproducts; do

    this_product=$(echo "$product_docset" | cut -f1 -d'/')
    this_docset=$(echo "$product_docset" | cut -f2 -d'/')

    mkdir -p "$output_dir/$lang/$this_product/$this_docset"

    cat "$template_product" | sed -r \
      -e 's%@\{\{#base_path#}}%'"${base_path}"'%g' \
      -e 's%@\{\{#base_path_res#}}%'"${base_path}${res_path}"'%g' \
      -e 's%@\{\{#page_role#}}%'"product"'%g' \
      -e 's%@\{\{#template_extension#}}%'"$ext"'%g' \
      -e 's%@\{\{#ui_language#}}%'"$lang"'%g' \
      -e 's%@\{\{#omit_path_component#}}%'"$omit_lang_path"'%g' \
      -e 's%@\{\{#product#}}%'"$this_product"'%g' \
      -e 's%@\{\{#docset#}}%'"$this_docset"'%g' \
      > "$output_dir/$lang/$this_product/$this_docset/index.$ext"
  done

done

echo "-> $output_dir"

rm -rf $temp_dir
