/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.ml.feature.transform.extractor;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;
import org.cleartk.ml.Instance;
import org.cleartk.ml.feature.extractor.CleartkExtractorException;
import org.cleartk.ml.feature.extractor.FeatureExtractor1;
import org.cleartk.ml.feature.transform.TransformableFeature;
import org.cleartk.ml.feature.transform.extractor.FixedCosineSimilarity;
import org.cleartk.ml.feature.transform.extractor.SimilarityFunction;
import org.cleartk.ml.feature.transform.extractor.TfidfExtractor;

public class CentroidTfidfSimilarityExtractor<OUTCOME_T, FOCUS_T extends Annotation>
extends TfidfExtractor<OUTCOME_T, FOCUS_T> {
    private Map<String, Double> centroidMap;
    private SimilarityFunction simFunction;
    private static String docFreqFileSuffix = "_tfidf-centroid-extractor_idfmap.dat";
    private static String centroidMapFileSuffix = "_tfidf-centroid-extractor_centroidmap.dat";

    public static URI getDocumentFrequencyDataURI(String name, URI baseURI) throws MalformedURLException, URISyntaxException {
        return new URL(baseURI.toURL(), name + docFreqFileSuffix).toURI();
    }

    public static URI getCentroidDataURI(String name, URI baseURI) throws MalformedURLException, URISyntaxException {
        return new URL(baseURI.toURL(), name + centroidMapFileSuffix).toURI();
    }

    public CentroidTfidfSimilarityExtractor(String name) {
        super(name);
    }

    public CentroidTfidfSimilarityExtractor(String name, FeatureExtractor1<FOCUS_T> extractor) {
        super(name);
        this.subExtractor = extractor;
        this.isTrained = false;
        this.idfMap = new TfidfExtractor.IDFMap();
    }

    @Override
    public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) {
        ArrayList<Feature> features = new ArrayList<Feature>();
        ArrayList<Feature> featuresToTransform = new ArrayList<Feature>();
        for (Feature feature : instance.getFeatures()) {
            if (this.isTransformable(feature)) {
                featuresToTransform.addAll(((TransformableFeature)feature).getFeatures());
                continue;
            }
            features.add(feature);
        }
        Map<String, Double> featureMap = this.featuresToFeatureMap(featuresToTransform);
        features.add(new Feature(this.name, new Double(this.simFunction.distance(featureMap, this.centroidMap))));
        return new Instance<OUTCOME_T>(instance.getOutcome(), features);
    }

    public Map<String, Double> featuresToFeatureMap(List<Feature> features) {
        HashMap<String, Double> featureMap = new HashMap<String, Double>();
        for (Feature feature : features) {
            String termName = feature.getName();
            int tf = (Integer)feature.getValue();
            featureMap.put(termName, (double)tf * this.idfMap.getIDF(termName));
        }
        return featureMap;
    }

    @Override
    public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException {
        List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation);
        ArrayList<Feature> result = new ArrayList<Feature>();
        if (this.isTrained) {
            Map<String, Double> extractedFeatureMap = this.featuresToFeatureMap(extracted);
            result.add(new Feature(this.name, this.simFunction.distance(extractedFeatureMap, this.centroidMap)));
        } else {
            result.add(new TransformableFeature(this.name, extracted));
        }
        return result;
    }

    protected Map<String, Double> computeCentroid(Iterable<Instance<OUTCOME_T>> instances, TfidfExtractor.IDFMap idfs) {
        int numDocuments = idfs.getTotalDocumentCount();
        HashMap<String, Double> newCentroidMap = new HashMap<String, Double>();
        for (Instance<OUTCOME_T> instance : instances) {
            for (Feature feature : instance.getFeatures()) {
                if (!this.isTransformable(feature)) continue;
                for (Feature untransformedFeature : ((TransformableFeature)feature).getFeatures()) {
                    double sumTfidf;
                    String termName = untransformedFeature.getName();
                    int tf = (Integer)untransformedFeature.getValue();
                    double tfidf = (double)tf * idfs.getIDF(termName);
                    sumTfidf = newCentroidMap.containsKey(termName) ? (sumTfidf = ((Double)newCentroidMap.get(termName)).doubleValue()) : 0.0;
                    newCentroidMap.put(termName, sumTfidf + tfidf);
                }
            }
        }
        for (Map.Entry entry : newCentroidMap.entrySet()) {
            double mean = (Double)entry.getValue() / (double)numDocuments;
            newCentroidMap.put((String)entry.getKey(), mean);
        }
        return newCentroidMap;
    }

    @Override
    public void train(Iterable<Instance<OUTCOME_T>> instances) {
        this.idfMap = this.createIdfMap(instances);
        this.centroidMap = this.computeCentroid(instances, this.idfMap);
        this.isTrained = true;
        this.simFunction = new FixedCosineSimilarity(this.centroidMap);
    }

    @Override
    public void save(URI baseURI) throws IOException {
        URI centroidDataURI;
        URI documentFreqDataURI;
        try {
            documentFreqDataURI = CentroidTfidfSimilarityExtractor.getDocumentFrequencyDataURI(this.name, baseURI);
            centroidDataURI = CentroidTfidfSimilarityExtractor.getCentroidDataURI(this.name, baseURI);
        }
        catch (URISyntaxException e) {
            throw new IOException(e);
        }
        this.idfMap.save(documentFreqDataURI);
        File out = new File(centroidDataURI);
        BufferedWriter writer = null;
        writer = new BufferedWriter(new FileWriter(out));
        for (Map.Entry<String, Double> entry : this.centroidMap.entrySet()) {
            writer.append(String.format(Locale.ROOT, "%s\t%f\n", entry.getKey(), entry.getValue()));
        }
        writer.close();
    }

    @Override
    public void load(URI baseURI) throws IOException {
        URI centroidDataURI;
        URI documentFreqDataURI;
        try {
            documentFreqDataURI = CentroidTfidfSimilarityExtractor.getDocumentFrequencyDataURI(this.name, baseURI);
            centroidDataURI = CentroidTfidfSimilarityExtractor.getCentroidDataURI(this.name, baseURI);
        }
        catch (URISyntaxException e) {
            throw new IOException(e);
        }
        this.idfMap.load(documentFreqDataURI);
        File in = new File(centroidDataURI);
        BufferedReader reader = null;
        this.centroidMap = new HashMap<String, Double>();
        reader = new BufferedReader(new FileReader(in));
        String line = null;
        while ((line = reader.readLine()) != null) {
            String[] featureMeanTfidf = line.split("\\t");
            double tfidf = Double.parseDouble(featureMeanTfidf[1]);
            this.centroidMap.put(featureMeanTfidf[0], tfidf);
        }
        reader.close();
        this.simFunction = new FixedCosineSimilarity(this.centroidMap);
        this.isTrained = true;
    }
}

