#!/usr/bin/ruby.ruby3.4 
# encoding: UTF-8

require 'bundler/setup'

require 'image_optim'
require 'image_optim/cmd'
require 'progress'
require 'shellwords'
require 'gdbm'
require 'digest'
require 'erb'
require 'ostruct'

DIR = 'tmp'.freeze
Pathname(DIR).mkpath

Array.class_eval do
  # For an array of arrays with possible values yields arrays with all
  # combinations of values
  #
  #     [[1, 2], 3, [4, 5]].variants{ |v| p v }
  #     # [1, 3, 4]
  #     # [1, 3, 5]
  #     # [2, 3, 4]
  #     # [2, 3, 5]
  def variants(&block)
    if block
      if empty?
        yield([])
      else
        head, *tail = map(&method(:Array))
        head.product(*tail, &block)
      end
      self
    else
      enum_for(:variants)
    end
  end

  # Sum elements or results of running block on elements
  def sum(initial = 0, &block)
    if block
      reduce(initial){ |memo, item| memo + block[item] }
    else
      reduce(initial, :+)
    end
  end
end

Hash.class_eval do
  # For a hash with arrays of possible values yields hashes with all
  # combinations of keys mapped to value
  #
  #     {:a => [1, 2], :b => 3, :c => [4, 5]}.variants{ |v| p v }
  #     # {:a=>1, :b=>3, :c=>4}
  #     # {:a=>1, :b=>3, :c=>5}
  #     # {:a=>2, :b=>3, :c=>4}
  #     # {:a=>2, :b=>3, :c=>5}
  def variants
    if block_given?
      if empty?
        yield({})
      else
        keys, values = to_a.transpose
        values.variants do |variant|
          yield Hash[keys.zip(variant)]
        end
      end
      self
    else
      enum_for(:variants)
    end
  end
end

Process.times.class.class_eval do
  def sum
    utime + stime + cutime + cstime
  end
end

ImageOptim::Path.class_eval do
  def digest
    @digest ||= Digest::SHA256.file(to_s).hexdigest
  end

  def etag
    [mtime, digest]
  end
end

# Analyse efficency of workers
class Analyser
  Cmd = ImageOptim::Cmd
  HashHelpers = ImageOptim::HashHelpers

  # Caching entries using GDBM
  class Cache
    DB = GDBM.new("#{DIR}/worker-analysis.db")

    class << self
      def get(context, key, etag, &block)
        full_key = [context, key]
        if block
          get!(full_key, etag) || set!(full_key, etag, &block)
        else
          get!(full_key, etag)
        end
      end

      def set(context, key, etag, &block)
        set!([context, key], etag, &block)
      end

    private

      def get!(key, etag)
        raw = DB[Marshal.dump(key)]
        return unless raw
        entry = Marshal.load(raw)
        return unless entry[1] == etag
        entry[0]
      end

      def set!(key, etag, &_block)
        value = yield
        DB[Marshal.dump(key)] = Marshal.dump([value, etag])
        value
      end
    end
  end

  # Delegate to worker with short id
  class WorkerVariant < DelegateClass(ImageOptim::Worker)
    attr_reader :name, :id, :cons_id, :required
    def initialize(klass, image_optim, options)
      @required = options.delete(:required)
      @run_order = options.delete(:run_order)
      allow_consecutive_on = Array(options.delete(:allow_consecutive_on))
      @image_optim = image_optim
      @name = klass.bin_sym.to_s + options_string(options)
      __setobj__(klass.new(image_optim, options))
      @id = klass.bin_sym.to_s + options_string(self.options)
      @cons_id = [klass, allow_consecutive_on.map{ |key| [key, send(key)] }]
    end

    def run_order
      @run_order || super
    end

    def etag
      [
        id,
        bin_versions,
        source_digest,
      ]
    end

  private

    def bin_versions
      @bin_versions ||= used_bins.map do |name|
        bin = @image_optim.resolve_bin!(name)
        "#{bin.name} #{bin.version}"
      end
    end

    def source_digest
      @digest ||= begin
        source_path = __getobj__.method(:optimize).source_location[0]
        Digest::SHA256.file(source_path).hexdigest
      end
    end

    def options_string(options)
      return '' if options.empty?
      "(#{options.sort.map{ |k, v| "#{k}:#{v.inspect}" }.join(', ')})"
    end
  end

  # One worker result
  StepResult = Struct.new(*[
    :worker_id,
    :success,
    :time,
    :src_size,
    :dst_size,
    :cache,
  ]) do
    def self.run(src, worker)
      dst = src.temp_path
      start = Process.times.sum
      success = worker.optimize(src, dst)
      time = Process.times.sum - start

      dst_size = success ? dst.size : nil
      digest = (success ? dst : src).digest
      cache = digest.sub(/../, '\0/')
      result = new(worker.id, success, time, src.size, dst_size, cache)
      if success
        path = result.path
        unless path.exist?
          path.dirname.mkpath
          dst.rename(path)
        end
      end
      result
    end

    def size
      success ? dst_size : src_size
    end

    def path
      ImageOptim::Path.convert("#{DIR}/worker-analysis/#{cache}")
    end

    def inspect
      "<S:#{worker_id} #{success ? '✓' : '✗'} #{time}s #{src_size}→#{dst_size}>"
    end
  end

  # Chain of workers result
  ChainResult = Struct.new(*[
    :format,
    :steps,
    :difference,
  ]) do
    def worker_ids
      steps.map(&:worker_id)
    end

    def time
      steps.sum(&:time)
    end

    def src_size
      steps.first.src_size
    end

    def dst_size
      steps.last.size
    end

    def ratio
      dst_size.to_f / src_size
    end

    def inspect
      "<C #{src_size}→#{dst_size} %:#{difference} #{steps.inspect}>"
    end
  end

  # Run all possible worker chains
  class WorkerRunner
    def initialize(path, workers)
      @path = ImageOptim::Path.convert(path)
      @workers = workers
    end

    def results
      results = []
      run_workers(@path, @workers){ |result| results << result }
      run_cache.clear
      results
    end

  private

    def run_cache
      @run_cache ||= Hash.new{ |h, k| h[k] = {} }
    end

    def with_progress(workers, last_result, &block)
      if !last_result || last_result.steps.length < 3
        workers.with_progress(&block)
      else
        workers.each(&block)
      end
    end

    def run_workers(src, workers, last_result = nil, &block)
      required_workers = workers.select(&:required)
      with_progress(workers, last_result) do |worker|
        next if required_workers.any?{ |w| w.run_order < worker.run_order }
        worker_result, result_image = run_worker(src, worker)

        steps = (last_result ? last_result.steps : []) + [worker_result]
        chain_result = ChainResult.new(src.image_format, steps)
        chain_result.difference = difference_with(result_image)

        yield chain_result

        workers_left = workers.reject do |w|
          w.cons_id == worker.cons_id || w.run_order < worker.run_order
        end
        run_workers(result_image, workers_left, chain_result, &block)
      end
    end

    def run_worker(src, worker)
      run_cache[:run][[src.digest, worker.id]] ||= begin
        cache_args = [:result, [src.digest, worker.id], worker.etag]
        result = Cache.get(*cache_args)
        if !result || (result.success && !result.path.exist?)
          result = Cache.set(*cache_args) do
            StepResult.run(src, worker)
          end
        end
        [result, result.success ? result.path : src]
      end
    end

    def difference_with(other)
      run_cache[:difference][other.digest] ||=
      Cache.get(:difference, [@path.digest, other.digest].sort, nil) do
        images = for_compare(@path, other)

        nrmse = Cmd.capture(%W[
          convert
          #{images[0].image_format}:#{images[0]} -auto-orient
          #{images[1].image_format}:#{images[1]} -auto-orient
          -metric RMSE
          -compare
          -format %[distortion]
          info:
        ].shelljoin).to_f
        unless $CHILD_STATUS.success?
          fail "failed comparison of #{@path} with #{other}"
        end
        nrmse
      end
    end

    def for_compare(*images)
      images.map!{ |image| flatten_animation(image) }

      alpha_presence = images.map do |image|
        !!Cmd.capture(%W[
          identify
          -format %A
          #{image.image_format}:#{image.to_s.shellescape}
        ].shelljoin)[/true/i]
      end
      if alpha_presence.uniq.length != 1
        images.map!{ |image| underlay_noise(image) }
      end

      images
    end

    def flatten_animation(image)
      run_cache[:flatten][image.digest] ||= begin
        if image.image_format == :gif
          flattened = image.temp_path
          Cmd.run(*%W[
            convert
            #{image.image_format}:#{image}
            -coalesce
            -append
            #{image.image_format}:#{flattened}
          ]) || fail("failed flattening of #{image}")
          flattened
        else
          image
        end
      end
    end

    def underlay_noise(image)
      run_cache[:noise][image.digest] ||= begin
        with_noise = image.temp_path
        Cmd.run(*%W[
          convert
          #{image.image_format}:#{image}
          +noise Random
          #{image.image_format}:#{image}
          -flatten
          -alpha off
          #{image.image_format}:#{with_noise}
        ]) || fail("failed underlaying noise to #{image}")
        with_noise
      end
    end
  end

  # Helper for producing statistics
  class Stats
    # Calculate statistics for chain
    class Chain
      attr_reader :worker_stats
      attr_reader :unused_workers
      attr_reader :entry_count
      attr_reader :original_size, :optimized_size, :ratio, :avg_ratio
      attr_reader :avg_difference, :max_difference, :warn_level
      attr_reader :time, :avg_time, :speed

      def initialize(worker_ids, results, ids2names)
        @worker_stats = build_worker_stats(worker_ids, results, ids2names)
        @unused_workers = worker_stats.any?(&:unused?)

        @entry_count = results.count
        @original_size = results.sum(&:src_size)
        @optimized_size = results.sum(&:dst_size)
        @ratio = optimized_size.to_f / original_size
        @avg_ratio = results.sum(&:ratio) / results.length
        @avg_difference = results.sum(&:difference) / results.length
        @max_difference = results.map(&:difference).max
        @time = results.sum(&:time)
        @avg_time = time / results.length

        @warn_level = calculate_warn_level
        @speed = calculate_speed
      end

    private

      def build_worker_stats(worker_ids, results, ids2names)
        steps_by_worker_id = results.flat_map(&:steps).group_by(&:worker_id)
        worker_ids.map do |worker_id|
          worker_name = ids2names[worker_id] || worker_id
          Worker.new(worker_name, steps_by_worker_id[worker_id])
        end
      end

      def calculate_warn_level
        case
        when max_difference >= 0.1 then 'high'
        when max_difference >= 0.01 then 'medium'
        when max_difference >= 0.001 then 'low'
        end
      end

      def calculate_speed
        case
        when time > 0 then (original_size - optimized_size) / time
        when original_size == optimized_size then 0
        else 1.0 / 0.0
        end
      end
    end

    # Worker usage
    class Worker
      attr_reader :name
      attr_reader :success_count
      attr_reader :time, :avg_time
      def initialize(name, steps)
        @name = name
        @success_count = steps.count(&:success)
        @time = steps.sum(&:time)
        @avg_time = time / steps.length
      end

      def unused?
        success_count.zero?
      end
    end

    attr_reader :name, :results, :ids2names
    def initialize(name, results, ids2names)
      @name = name.to_s
      @results = results
      @ids2names = ids2names
    end

    def each_chain(&block)
      chains = results.group_by(&:worker_ids).map do |worker_ids, results|
        Chain.new(worker_ids, results, ids2names)
      end
      chains.sort_by!{ |chain| [chain.optimized_size, chain.time] }
      chains.each(&block)
    end
  end

  def initialize(config)
    config = HashHelpers.deep_symbolise_keys(config)
    image_optim = ImageOptim.new

    @workers_by_format = Hash.new{ |h, k| h[k] = [] }
    ImageOptim::Worker.klasses.each do |klass|
      worker_config = config.delete(klass.bin_sym)
      next if worker_config == false
      worker_config ||= {}

      option_variants = worker_config.delete(:variants) || [{}]
      option_variants = case option_variants
      when Array then option_variants
      when Hash then option_variants.variants
      else fail "Array or Hash expected, got #{option_variants}"
      end
      option_variants.each do |options|
        options = HashHelpers.deep_symbolise_keys(options).merge(worker_config)
        worker = WorkerVariant.new(klass, image_optim, options)
        worker.image_formats.each do |format|
          @workers_by_format[format] << worker
        end
      end
    end

    log_workers_by_format

    fail "unknown variants: #{config}" unless config.empty?
  end

  def analyse(paths)
    results = collect_results(paths)

    template = ERB.new(template_path.read, nil, '>')
    by_format = results.group_by(&:format)
    formats = by_format.keys.sort
    basenames = Hash[formats.map do |format|
      [format, "worker-analysis-#{format}.html"]
    end]
    formats.each do |format|
      stats = Stats.new('all', by_format[format], worker_ids2names)
      path = FSPath("#{DIR}/#{basenames[format]}")
      model = {
        :stats_format => format,
        :stats => stats,
        :format_links => basenames,
        :template_dir => template_path.dirname.relative_path_from(path.dirname),
      }
      html = template.result(OpenStruct.new(model).instance_eval{ binding })
      path.write(html)
      puts "Created #{path}"
    end
  end

private

  def worker_ids2names
    Hash[@workers_by_format.values.flatten.map do |worker|
      [worker.id, worker.name]
    end]
  end

  def collect_results(paths)
    process_paths(paths).shuffle.with_progress.flat_map do |path|
      WorkerRunner.new(path, workers_for_image(path)).results
    end
  end

  def process_paths(paths)
    paths = paths.map{ |path| ImageOptim::Path.convert(path) }
    paths.select!{ |path| path.exist? || warn("#{path} doesn't exits") }
    paths.select!{ |path| path.file? || warn("#{path} is not a file") }
    paths.select!{ |path| path.image_format || warn("#{path} is not an image") }
    paths.select! do |path|
      workers_for_image(path) || warn("#{path} can't be handled by any worker")
    end
    paths
  end

  def workers_for_image(path)
    @workers_by_format[ImageOptim::Path.convert(path).image_format]
  end

  def log_workers_by_format
    @workers_by_format.each do |format, workers|
      puts "#{format}:"
      workers.sort_by.with_index{ |w, i| [w.run_order, i] }.each do |worker|
        puts "  #{worker.name} [#{worker.run_order}]"
      end
    end
  end

  def template_path
    FSPath("#{File.dirname(__FILE__)}/template/#{File.basename(__FILE__)}.erb")
  end
end

abort <<-HELP if ARGV.empty?
Specify paths for analysis.

Example of `.analysis_variants.yml`:
  jhead:
    required: true # don't skip this worker
  jpegtran: # 3 worker variants
    variants:
      - jpegrescan: true
      - progressive: true
      - progressive: false
  optipng: # 6 worker variants by combining options
    variants:
      level: [6, 7]
      interlace: [true, false, nil]
  gifsicle: # allow variants with different interlace to run consecutively
    variants:
      allow_consecutive_on: interlace
      interlace: [true, false]
      careful: [true, false]
  # other workers will be used with default options
HELP

Analyser.new(begin
  path = '.analysis_variants.yml'
  case h = YAML.load_file(path)
  when Hash then h
  when false then {}
  else abort "expected a hash in #{path}"
  end
rescue Errno::ENOENT => e
  warn e
  {}
end).analyse(ARGV)
