/*
 * Decompiled with CFR 0.152.
 */
package com.googlecode.clearnlp.pos;

import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.googlecode.clearnlp.classification.model.StringModel;
import com.googlecode.clearnlp.classification.prediction.StringPrediction;
import com.googlecode.clearnlp.classification.train.StringTrainSpace;
import com.googlecode.clearnlp.classification.vector.StringFeatureVector;
import com.googlecode.clearnlp.engine.AbstractEngine;
import com.googlecode.clearnlp.engine.EngineProcess;
import com.googlecode.clearnlp.feature.xml.FtrToken;
import com.googlecode.clearnlp.feature.xml.POSFtrXml;
import com.googlecode.clearnlp.pos.POSLib;
import com.googlecode.clearnlp.pos.POSNode;
import com.googlecode.clearnlp.util.UTInput;
import com.googlecode.clearnlp.util.UTOutput;
import com.googlecode.clearnlp.util.UTString;
import com.googlecode.clearnlp.util.map.Prob1DMap;
import com.googlecode.clearnlp.util.map.Prob2DMap;
import com.googlecode.clearnlp.util.pair.StringDoublePair;
import java.io.BufferedReader;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;

public class POSTagger
extends AbstractEngine {
    private Set<String> s_lemmas;
    private Prob2DMap p_ambi;
    private Map<String, String> m_ambi;
    private Prob1DMap p_forms;
    private Set<String> s_forms;
    private POSFtrXml f_xml;
    private StringTrainSpace s_space;
    private StringModel s_model;
    private POSNode[] p_nodes;
    private String[] g_labels;
    private int i_source;
    private double d_denominator;

    public POSTagger(Set<String> sLemma) {
        super((byte)0);
        this.s_lemmas = sLemma;
        this.p_ambi = new Prob2DMap();
        this.p_forms = new Prob1DMap();
    }

    public POSTagger(POSFtrXml xml, Set<String> sLemmas, Set<String> sForms, Map<String, String> ambiguityMap, StringTrainSpace trainSpace) {
        super((byte)1);
        this.f_xml = xml;
        this.s_lemmas = sLemmas;
        this.s_forms = sForms;
        this.m_ambi = ambiguityMap;
        this.s_space = trainSpace;
    }

    public POSTagger(POSFtrXml xml, Set<String> sLemmas, Set<String> sForms, Map<String, String> ambiguityMap, StringModel model) {
        super((byte)2);
        this.f_xml = xml;
        this.s_lemmas = sLemmas;
        this.s_forms = sForms;
        this.m_ambi = ambiguityMap;
        this.s_model = model;
        this.d_denominator = Math.sqrt(sForms.size());
    }

    public POSTagger(POSFtrXml xml, BufferedReader fin) {
        super((byte)2);
        this.f_xml = xml;
        this.loadModel(fin);
    }

    public void loadModel(BufferedReader fin) {
        try {
            this.s_lemmas = UTInput.getStringSet(fin);
            this.s_forms = UTInput.getStringSet(fin);
            this.m_ambi = UTInput.getStringMap(fin, " ");
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        this.s_model = new StringModel(fin);
        this.d_denominator = Math.sqrt(this.s_forms.size());
    }

    @Override
    public void saveModel(PrintStream fout) {
        UTOutput.printSet(fout, this.s_lemmas);
        UTOutput.printSet(fout, this.s_forms);
        UTOutput.printMap(fout, this.m_ambi, " ");
        this.s_model.save(fout);
    }

    public void init(POSNode[] nodes) {
        this.p_nodes = nodes;
        if (this.i_flag == 1) {
            this.g_labels = POSLib.getLabels(nodes);
        }
        int size = nodes.length;
        for (int i = 0; i < size; ++i) {
            nodes[i].pos = "_N_";
        }
    }

    public void tag(POSNode[] nodes) {
        EngineProcess.normalizeForms(nodes);
        if (this.i_flag == 0) {
            this.addLexica(nodes);
            return;
        }
        this.init(nodes);
        int size = nodes.length;
        this.i_source = 0;
        while (this.i_source < size) {
            StringFeatureVector vector = this.getFeatureVector(this.f_xml);
            switch (this.i_flag) {
                case 1: {
                    this.train(vector);
                    break;
                }
                case 2: {
                    this.predict(vector);
                }
            }
            ++this.i_source;
        }
    }

    private void train(StringFeatureVector vector) {
        String label;
        this.p_nodes[this.i_source].pos = label = this.g_labels[this.i_source];
        if (vector.size() > 0) {
            this.s_space.addInstance(label, vector);
        }
    }

    private void predict(StringFeatureVector vector) {
        StringPrediction p = this.s_model.predictBest(vector);
        this.p_nodes[this.i_source].pos = p.label;
    }

    private void addLexica(POSNode[] nodes) {
        for (POSNode node : nodes) {
            if (!this.s_lemmas.contains(node.lemma)) continue;
            this.p_forms.add(node.simplifiedForm);
            this.p_ambi.add(node.simplifiedForm, node.pos);
        }
    }

    public Map<String, String> getAmbiguityMap(double cutoff) {
        HashMap<String, String> mAmbi = new HashMap<String, String>();
        for (String form : this.p_ambi.keySet()) {
            Object[] ps = this.p_ambi.getProb1D(form);
            StringBuilder build = new StringBuilder();
            Arrays.sort(ps);
            for (Object p : ps) {
                if (((StringDoublePair)p).d <= cutoff) break;
                build.append("_");
                build.append(((StringDoublePair)p).s);
            }
            if (build.length() <= 0) continue;
            mAmbi.put(form, build.substring(1));
        }
        return mAmbi;
    }

    public Set<String> getFormSet(int cutoff) {
        HashSet<String> set = new HashSet<String>();
        for (ObjectCursor cur : this.p_forms.keys()) {
            String key = (String)cur.value;
            if (this.p_forms.get(key) <= cutoff) continue;
            set.add(key);
        }
        return set;
    }

    public void clearFormSet() {
        this.s_forms.clear();
    }

    public double getCosineSimilarity(POSNode[] nodes) {
        HashSet<String> set = new HashSet<String>();
        for (POSNode node : nodes) {
            set.add(node.simplifiedForm);
        }
        double d = Math.sqrt(set.size()) * this.d_denominator;
        set.retainAll(this.s_forms);
        return (double)set.size() / d;
    }

    @Override
    protected String getField(FtrToken token) {
        POSNode node = this.getNode(token);
        if (node == null) {
            return null;
        }
        if (token.isField("f")) {
            return this.s_lemmas.contains(node.lemma) ? node.simplifiedForm : null;
        }
        if (token.isField("m")) {
            return this.s_lemmas.contains(node.lemma) ? node.lemma : null;
        }
        if (token.isField("p")) {
            return node.isPos("_N_") ? null : node.pos;
        }
        if (token.isField("a")) {
            return this.m_ambi.get(node.simplifiedForm);
        }
        Matcher m = POSFtrXml.P_BOOLEAN.matcher(token.field);
        if (m.find()) {
            int field = Integer.parseInt(m.group(1));
            switch (field) {
                case 0: {
                    return UTString.isAllUpperCase(node.simplifiedForm) ? token.field : null;
                }
                case 1: {
                    return UTString.isAllLowerCase(node.simplifiedForm) ? token.field : null;
                }
                case 2: {
                    return UTString.beginsWithUpperCase(node.simplifiedForm) ? token.field : null;
                }
                case 3: {
                    return UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) == 1 ? token.field : null;
                }
                case 4: {
                    return UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) > 1 ? token.field : null;
                }
                case 5: {
                    return node.simplifiedForm.contains(".") ? token.field : null;
                }
                case 6: {
                    return UTString.containsDigit(node.simplifiedForm) ? token.field : null;
                }
                case 7: {
                    return node.simplifiedForm.contains("-") ? token.field : null;
                }
            }
        }
        return null;
    }

    @Override
    protected String[] getFields(FtrToken token) {
        POSNode node = this.getNode(token);
        if (node == null) {
            return null;
        }
        Matcher m = POSFtrXml.P_PREFIX.matcher(token.field);
        if (m.find()) {
            String[] fields = UTString.getPrefixes(node.lemma, Integer.parseInt(m.group(1)));
            return fields.length == 0 ? null : fields;
        }
        m = POSFtrXml.P_SUFFIX.matcher(token.field);
        if (m.find()) {
            String[] fields = UTString.getSuffixes(node.lemma, Integer.parseInt(m.group(1)));
            return fields.length == 0 ? null : fields;
        }
        return null;
    }

    private POSNode getNode(FtrToken token) {
        int index = this.i_source;
        return 0 <= (index += token.offset) && index < this.p_nodes.length ? this.p_nodes[index] : null;
    }
}

