package ws.palladian.extraction.token;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;
import org.apache.commons.lang3.Validate;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.processing.features.Annotation;
import ws.palladian.processing.features.ImmutableAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/token/OpenNlpTokenizer.class */
public final class OpenNlpTokenizer extends BaseTokenizer {
    private final opennlp.tools.tokenize.Tokenizer tokenizer;

    public OpenNlpTokenizer() {
        this(SimpleTokenizer.INSTANCE);
    }

    public OpenNlpTokenizer(opennlp.tools.tokenize.Tokenizer tokenizer) {
        Validate.notNull(tokenizer, "tokenizer must not be null", new Object[0]);
        this.tokenizer = tokenizer;
    }

    public OpenNlpTokenizer(File file) {
        Validate.notNull(file, "modelFile must not be null", new Object[0]);
        FileInputStream fileInputStream = null;
        try {
            try {
                fileInputStream = new FileInputStream(file);
                TokenizerModel tokenizerModel = new TokenizerModel(fileInputStream);
                FileHelper.close(fileInputStream);
                this.tokenizer = new TokenizerME(tokenizerModel);
            } catch (IOException e) {
                throw new IllegalStateException("Error initializing OpenNLP Tokenizer from \"" + file.getAbsolutePath() + "\": " + e.getMessage());
            }
        } catch (Throwable th) {
            FileHelper.close(fileInputStream);
            throw th;
        }
    }

    @Override // ws.palladian.processing.Tagger
    public List<Annotation> getAnnotations(String str) {
        Span[] spanArr = this.tokenizer.tokenizePos(str);
        ArrayList newArrayList = CollectionHelper.newArrayList();
        for (Span span : spanArr) {
            newArrayList.add(new ImmutableAnnotation(span.getStart(), str.substring(span.getStart(), span.getEnd()), BaseTokenizer.PROVIDED_FEATURE));
        }
        return newArrayList;
    }
}
