package ws.palladian.extraction.pos;

import com.aliasi.hmm.HiddenMarkovModel;
import com.aliasi.hmm.HmmDecoder;
import com.aliasi.tag.Tagging;
import com.aliasi.util.FastCache;
import edu.smu.tspell.wordnet.impl.file.SenseKey;
import edu.stanford.nlp.ling.CoreLabel;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.extraction.pos.filter.TagFilter;
import ws.palladian.extraction.token.BaseTokenizer;
import ws.palladian.extraction.token.LingPipeTokenizer;
import ws.palladian.helper.ProgressHelper;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.math.ConfusionMatrix;
import ws.palladian.helper.math.MathHelper;
import ws.palladian.processing.features.PositionAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/pos/LingPipePosTagger.class */
public final class LingPipePosTagger extends BasePosTagger {
    private static final String TAGGER_NAME = "LingPipe POS-Tagger";
    private final HiddenMarkovModel model;
    private final TagFilter tagFilter;
    private static final Logger LOGGER = LoggerFactory.getLogger(LingPipePosTagger.class);
    private static final LingPipeTokenizer TOKENIZER = new LingPipeTokenizer();

    public LingPipePosTagger(File file) {
        this(file, (TagFilter) null);
    }

    public LingPipePosTagger(File file, TagFilter tagFilter) {
        Validate.notNull(file, "modelFile must not be null", new Object[0]);
        FileInputStream fileInputStream = null;
        try {
            try {
                fileInputStream = new FileInputStream(file);
                this.model = loadModel(fileInputStream);
                FileHelper.close(fileInputStream);
                this.tagFilter = tagFilter;
            } catch (FileNotFoundException e) {
                throw new IllegalStateException(e);
            }
        } catch (Throwable th) {
            FileHelper.close(fileInputStream);
            throw th;
        }
    }

    public LingPipePosTagger(InputStream inputStream, TagFilter tagFilter) {
        Validate.notNull(inputStream, "modelStream must not be null", new Object[0]);
        this.model = loadModel(inputStream);
        this.tagFilter = tagFilter;
    }

    private HiddenMarkovModel loadModel(InputStream inputStream) {
        ObjectInputStream objectInputStream = null;
        try {
            try {
                try {
                    objectInputStream = new ObjectInputStream(inputStream);
                    HiddenMarkovModel hiddenMarkovModel = (HiddenMarkovModel) objectInputStream.readObject();
                    FileHelper.close(objectInputStream);
                    return hiddenMarkovModel;
                } catch (ClassNotFoundException e) {
                    throw new IllegalStateException("Error while loading model file: " + e.getMessage());
                }
            } catch (IOException e2) {
                throw new IllegalStateException("Error while loading model file: " + e2.getMessage());
            }
        } catch (Throwable th) {
            FileHelper.close(objectInputStream);
            throw th;
        }
    }

    @Override // ws.palladian.extraction.pos.BasePosTagger
    public void tag(List<PositionAnnotation> list) {
        Integer num = 100;
        Tagging<String> tag = new HmmDecoder(this.model, null, new FastCache(num.intValue())).tag(getTokenList(list));
        for (int i = 0; i < tag.size(); i++) {
            assignTag(list.get(i), this.tagFilter == null ? Arrays.asList(tag.tag(i)) : this.tagFilter.filter(tag.tag(i)));
        }
    }

    public void evaluate(String str, String str2) {
        StopWatch stopWatch = new StopWatch();
        LOGGER.info("start evaluating the tagger");
        ConfusionMatrix confusionMatrix = new ConfusionMatrix();
        int i = 1;
        int i2 = 0;
        int i3 = 0;
        Integer num = 100;
        HmmDecoder hmmDecoder = new HmmDecoder(this.model, null, new FastCache(num.intValue()));
        for (File file : FileHelper.getFiles(str)) {
            for (String str3 : FileHelper.readFileToString(file).split("\\s")) {
                if (!str3.isEmpty()) {
                    String[] split = str3.split(CoreLabel.TAG_SEPARATOR);
                    if (split.length >= 2) {
                        String str4 = hmmDecoder.tag(Arrays.asList(split[0])).tags().get(0);
                        String lowerCase = normalizeTag(split[1]).toLowerCase();
                        confusionMatrix.add(lowerCase, str4);
                        if (str4.equals(lowerCase)) {
                            i2++;
                        }
                        i3++;
                    }
                }
            }
            int i4 = i;
            i++;
            ProgressHelper.printProgress(i4, r0.length, 1.0d);
        }
        LOGGER.info("all files read in " + stopWatch.getElapsedTimeString());
        LOGGER.info("Accuracy: " + MathHelper.round((100.0d * i2) / i3, 2) + SenseKey.LEMMA_TERMINATOR);
        LOGGER.info(FileHelper.NEWLINE_CHARACTER + confusionMatrix);
        LOGGER.info("finished evaluating the tagger in " + stopWatch.getElapsedTimeString());
    }

    @Override // ws.palladian.extraction.pos.BasePosTagger
    public String getName() {
        return TAGGER_NAME;
    }

    @Override // ws.palladian.extraction.pos.BasePosTagger
    protected BaseTokenizer getTokenizer() {
        return TOKENIZER;
    }

    public static void main(String[] strArr) {
    }
}
