package ws.palladian.extraction.phrase;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import ws.palladian.extraction.pos.OpenNlpPosTagger;
import ws.palladian.helper.Cache;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.processing.features.Annotation;
import ws.palladian.processing.features.ImmutableAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/phrase/OpenNlpPhraseChunker.class */
public final class OpenNlpPhraseChunker implements PhraseChunker {
    private static final String CHUNKER_NAME = "OpenNLP Phrase Chunker";
    private final ChunkerME model;
    private final OpenNlpPosTagger tagger;

    public OpenNlpPhraseChunker(File file, File file2) {
        this.model = loadModel(file);
        this.tagger = new OpenNlpPosTagger(file2);
    }

    @Override // ws.palladian.extraction.phrase.PhraseChunker
    public List<Annotation> chunk(String str) {
        return chunk(str, this.tagger.getAnnotations(str));
    }

    @Override // ws.palladian.extraction.phrase.PhraseChunker
    public String getName() {
        return CHUNKER_NAME;
    }

    private List<Annotation> chunk(String str, List<Annotation> list) {
        String[] strArr = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            strArr[i] = list.get(i).getValue();
        }
        String[] strArr2 = new String[list.size()];
        for (int i2 = 0; i2 < list.size(); i2++) {
            strArr2[i2] = list.get(i2).getTag();
        }
        String[] chunk = this.model.chunk(strArr, strArr2);
        String str2 = "";
        StringBuilder sb = new StringBuilder();
        ArrayList newArrayList = CollectionHelper.newArrayList();
        for (int i3 = 0; i3 < chunk.length; i3++) {
            String str3 = chunk[i3];
            if (str3.contains("B-")) {
                str2 = str3.substring(2);
                sb.replace(0, sb.length(), strArr[i3]);
            } else if (str3.contains("I-")) {
                sb.append(' ').append(strArr[i3]);
                str2 = str3.substring(2);
            }
            if ((i3 + 1 < chunk.length && chunk[i3 + 1].contains("B-")) || i3 == chunk.length - 1) {
                newArrayList.add(new ImmutableAnnotation(str.indexOf(sb.toString()), sb.toString(), str2));
            }
        }
        return newArrayList;
    }

    private final ChunkerME loadModel(File file) {
        String absolutePath = file.getAbsolutePath();
        ChunkerME chunkerME = (ChunkerME) Cache.getInstance().getDataObject(absolutePath);
        if (chunkerME == null) {
            try {
                chunkerME = new ChunkerME(new ChunkerModel(new FileInputStream(absolutePath)));
                Cache.getInstance().putDataObject(absolutePath, chunkerME);
            } catch (IOException e) {
                throw new IllegalStateException("Error while loading model file \"" + absolutePath + "\": " + e.getMessage());
            }
        }
        return chunkerME;
    }
}
