package ws.palladian.extraction.feature;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.lang3.Validate;
import ws.palladian.helper.constants.Language;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.LineAction;
import ws.palladian.processing.features.PositionAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/StopTokenRemover.class */
public final class StopTokenRemover extends AbstractTokenRemover {
    private final Set<String> stopwords;

    public StopTokenRemover(Language language) {
        Validate.notNull(language, "languae must not be null", new Object[0]);
        switch (language) {
            case ENGLISH:
                this.stopwords = loadStopwordsResource("/stopwords_en.txt");
                return;
            case GERMAN:
                this.stopwords = loadStopwordsResource("/stopwords_de.txt");
                return;
            default:
                this.stopwords = Collections.emptySet();
                return;
        }
    }

    public StopTokenRemover(File file) {
        Validate.notNull(file, "file must not be null", new Object[0]);
        try {
            this.stopwords = loadStopwords(new FileInputStream(file));
        } catch (FileNotFoundException e) {
            throw new IllegalArgumentException("File \"" + file + "\" not found.");
        }
    }

    private Set<String> loadStopwordsResource(String str) {
        InputStream resourceAsStream = StopTokenRemover.class.getResourceAsStream(str);
        if (resourceAsStream == null) {
            throw new IllegalStateException("Resource \"" + str + "\" not found.");
        }
        try {
            Set<String> loadStopwords = loadStopwords(resourceAsStream);
            FileHelper.close(resourceAsStream);
            return loadStopwords;
        } catch (Throwable th) {
            FileHelper.close(resourceAsStream);
            throw th;
        }
    }

    private Set<String> loadStopwords(InputStream inputStream) {
        final HashSet hashSet = new HashSet();
        FileHelper.performActionOnEveryLine(inputStream, new LineAction() { // from class: ws.palladian.extraction.feature.StopTokenRemover.1
            @Override // ws.palladian.helper.io.LineAction
            public void performAction(String str, int i) {
                String trim = str.trim();
                if (trim.startsWith("#") || trim.isEmpty()) {
                    return;
                }
                hashSet.add(str.toLowerCase());
            }
        });
        return hashSet;
    }

    public boolean isStopword(String str) {
        return this.stopwords.contains(str.toLowerCase());
    }

    @Override // ws.palladian.extraction.feature.AbstractTokenRemover
    protected boolean remove(PositionAnnotation positionAnnotation) {
        return isStopword(positionAnnotation.getValue());
    }

    @Override // ws.palladian.processing.AbstractPipelineProcessor
    public String toString() {
        return "StopTokenRemover [#stopwords=" + this.stopwords.size() + DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END;
    }
}
