/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.postagger;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;

@PipeBitInfo(name="OpenNLP POS Reader", description="Reads in part-of-speech training/test data in the OpenNLP format.", role=PipeBitInfo.Role.READER, products={PipeBitInfo.TypeProduct.BASE_TOKEN, PipeBitInfo.TypeProduct.SENTENCE})
public class OpenNLPPOSCollectionReader
extends CollectionReader_ImplBase {
    public static final String POS_DATA_FILE_PARAM = "PosDataFile";
    public static final String LOAD_WORDS_ONLY_PARAM = "LoadWordsOnly";
    BufferedReader input;
    String line = null;
    boolean loadWordsOnly;

    public void initialize() throws ResourceInitializationException {
        try {
            String posDataFile = (String)this.getConfigParameterValue(POS_DATA_FILE_PARAM);
            this.input = new BufferedReader(new FileReader(posDataFile));
            Boolean paramValue = (Boolean)this.getConfigParameterValue(LOAD_WORDS_ONLY_PARAM);
            this.loadWordsOnly = paramValue == null ? false : paramValue;
        }
        catch (FileNotFoundException fnfe) {
            throw new ResourceInitializationException((Throwable)fnfe);
        }
    }

    public void getNext(CAS cas) throws IOException, CollectionException {
        try {
            if (this.hasNext()) {
                JCas jCas = cas.getJCas();
                String[] tokens = this.line.split(" ");
                int wordStart = 0;
                int wordEnd = 0;
                int wordNumber = 0;
                StringBuffer documentText = new StringBuffer();
                for (String token : tokens) {
                    int split = token.lastIndexOf("_");
                    if (split == token.length() - 1) {
                        split = token.substring(0, token.length() - 1).lastIndexOf("_");
                    }
                    if (split == -1) {
                        this.line = null;
                        throw new CollectionException("There is a problem in your training data: " + token + " does not conform to the format WORD_TAG.", null);
                    }
                    String word = token.substring(0, split);
                    wordEnd = wordStart + word.length();
                    BaseToken baseToken = new BaseToken(jCas, wordStart, wordEnd);
                    if (!this.loadWordsOnly) {
                        String tag = token.substring(split + 1);
                        baseToken.setPartOfSpeech(tag);
                    }
                    baseToken.setTokenNumber(wordNumber++);
                    baseToken.addToIndexes();
                    documentText.append(word + " ");
                    wordStart = wordEnd + 1;
                }
                Sentence sentence = new Sentence(jCas, 0, wordEnd);
                sentence.setSentenceNumber(0);
                sentence.addToIndexes();
                jCas.setDocumentText(documentText.toString());
            }
        }
        catch (CASException ce) {
            throw new CollectionException((Throwable)ce);
        }
        this.line = null;
    }

    public void close() throws IOException {
        this.input.close();
    }

    public Progress[] getProgress() {
        return null;
    }

    public boolean hasNext() throws IOException, CollectionException {
        if (this.line == null) {
            this.line = this.input.readLine();
        }
        return this.line != null;
    }
}

