/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.coreference.ae.features.cluster;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.ListIterable;
import org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Markable;
import org.apache.ctakes.utils.distsem.WordEmbeddings;
import org.apache.ctakes.utils.distsem.WordVector;
import org.apache.ctakes.utils.distsem.WordVectorReader;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;

public class MentionClusterDistSemExtractor
implements RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
    public static final double DEFAULT_SIM = 0.5;
    private WordEmbeddings words = null;

    public MentionClusterDistSemExtractor() throws FileNotFoundException, IOException {
        this("org/apache/ctakes/coreference/distsem/mimic_vectors.txt");
    }

    public MentionClusterDistSemExtractor(String embeddingsPath) throws FileNotFoundException, IOException {
        this.words = WordVectorReader.getEmbeddings((InputStream)FileLocator.getAsStream((String)embeddingsPath));
    }

    public List<Feature> extract(JCas jCas, CollectionTextRelation cluster, IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
        String mentionHead;
        ArrayList<Feature> feats = new ArrayList<Feature>();
        if (StringMatchingFeatureExtractor.isPronoun(mention)) {
            return feats;
        }
        double maxSim = 0.0;
        double maxPhraseSim = 0.0;
        ConllDependencyNode mentionNode = DependencyUtility.getNominalHeadNode((JCas)jCas, (Annotation)mention);
        double[] mentionVec = this.getPhraseVec((Annotation)mention);
        boolean exactMatch = false;
        String string = mentionHead = mentionNode != null ? mentionNode.getCoveredText().toLowerCase() : null;
        if (mentionHead != null) {
            for (Markable member : new ListIterable(cluster.getMembers())) {
                double sim;
                ConllDependencyNode memberNode;
                String memberHead;
                if (mention.getBegin() < member.getEnd()) break;
                double[] memberVec = this.getPhraseVec((Annotation)member);
                double phraseSim = 0.0;
                for (int i = 0; i < memberVec.length; ++i) {
                    phraseSim += mentionVec[i] * memberVec[i];
                }
                if (phraseSim > maxPhraseSim) {
                    maxPhraseSim = phraseSim;
                }
                String string2 = memberHead = (memberNode = DependencyUtility.getNominalHeadNode((JCas)jCas, (Annotation)member)) != null ? memberNode.getCoveredText().toLowerCase() : null;
                if (mentionHead.equals(memberHead)) {
                    exactMatch = true;
                }
                if (memberNode == null || !this.words.containsKey(memberHead) || !this.words.containsKey(mentionHead) || !((sim = this.words.getSimilarity(mentionHead, memberHead)) > maxSim)) continue;
                maxSim = sim;
            }
        }
        if (exactMatch) {
            maxSim = 0.0;
        }
        feats.add(new Feature("HEAD_SIMILARITY_WORD2VEC", (Object)maxSim));
        return feats;
    }

    private double[] getPhraseVec(Annotation annotation) {
        int i;
        double[] phraseVec = new double[this.words.getDimensionality()];
        double vecLength = 0.0;
        for (BaseToken token : JCasUtil.selectCovered(BaseToken.class, (AnnotationFS)annotation)) {
            String word = token.getCoveredText().toLowerCase();
            if (!this.words.containsKey(word)) continue;
            WordVector vec = this.words.getVector(word);
            int i2 = 0;
            while (i2 < phraseVec.length) {
                double val = vec.getValue(i2);
                int n = i2++;
                phraseVec[n] = phraseVec[n] + val;
                vecLength = val * val;
            }
        }
        for (i = 0; i < phraseVec.length; ++i) {
            double val = phraseVec[i];
            vecLength += val * val;
        }
        if ((vecLength = Math.sqrt(vecLength)) > 0.0) {
            i = 0;
            while (i < phraseVec.length) {
                int n = i++;
                phraseVec[n] = phraseVec[n] / vecLength;
            }
        }
        return phraseVec;
    }
}

