package ws.palladian.extraction.feature;

import java.util.Iterator;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.lang3.Validate;
import org.tartarus.snowball.SnowballStemmer;
import org.tartarus.snowball.ext.danishStemmer;
import org.tartarus.snowball.ext.dutchStemmer;
import org.tartarus.snowball.ext.englishStemmer;
import org.tartarus.snowball.ext.finnishStemmer;
import org.tartarus.snowball.ext.frenchStemmer;
import org.tartarus.snowball.ext.germanStemmer;
import org.tartarus.snowball.ext.hungarianStemmer;
import org.tartarus.snowball.ext.italianStemmer;
import org.tartarus.snowball.ext.norwegianStemmer;
import org.tartarus.snowball.ext.porterStemmer;
import org.tartarus.snowball.ext.portugueseStemmer;
import org.tartarus.snowball.ext.romanianStemmer;
import org.tartarus.snowball.ext.russianStemmer;
import org.tartarus.snowball.ext.spanishStemmer;
import org.tartarus.snowball.ext.swedishStemmer;
import org.tartarus.snowball.ext.turkishStemmer;
import ws.palladian.extraction.token.BaseTokenizer;
import ws.palladian.helper.constants.Language;
import ws.palladian.processing.DocumentUnprocessableException;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.ListFeature;
import ws.palladian.processing.features.NominalFeature;
import ws.palladian.processing.features.PositionAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/StemmerAnnotator.class */
public final class StemmerAnnotator extends TextDocumentPipelineProcessor {
    public static final String STEM = "ws.palladian.features.stem";
    public static final String UNSTEM = "ws.palladian.features.unstem";
    private final Mode mode;
    private final SnowballStemmer stemmer;

    /* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/StemmerAnnotator$Mode.class */
    public enum Mode {
        ANNOTATE,
        MODIFY
    }

    public StemmerAnnotator() {
        this(new porterStemmer(), Mode.ANNOTATE);
    }

    public StemmerAnnotator(SnowballStemmer snowballStemmer, Mode mode) {
        Validate.notNull(snowballStemmer, "stemmer must not be null", new Object[0]);
        Validate.notNull(mode, "mode must not be null", new Object[0]);
        this.stemmer = snowballStemmer;
        this.mode = mode;
    }

    public StemmerAnnotator(Language language) {
        this(language, Mode.ANNOTATE);
    }

    public StemmerAnnotator(Language language, Mode mode) {
        Validate.notNull(language, "language must not be null", new Object[0]);
        Validate.notNull(mode, "mode must not be null", new Object[0]);
        this.stemmer = createStemmer(language);
        this.mode = mode;
    }

    private static final SnowballStemmer createStemmer(Language language) {
        switch (language) {
            case DANISH:
                return new danishStemmer();
            case DUTCH:
                return new dutchStemmer();
            case ENGLISH:
                return new englishStemmer();
            case FINNISH:
                return new finnishStemmer();
            case FRENCH:
                return new frenchStemmer();
            case GERMAN:
                return new germanStemmer();
            case HUNGARIAN:
                return new hungarianStemmer();
            case ITALIAN:
                return new italianStemmer();
            case NORWEGIAN:
                return new norwegianStemmer();
            case PORTUGUESE:
                return new portugueseStemmer();
            case ROMANIAN:
                return new romanianStemmer();
            case RUSSIAN:
                return new russianStemmer();
            case SPANISH:
                return new spanishStemmer();
            case SWEDISH:
                return new swedishStemmer();
            case TURKISH:
                return new turkishStemmer();
            default:
                throw new IllegalArgumentException("No stemmer for language " + language.toString() + " available.");
        }
    }

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public void processDocument(TextDocument textDocument) throws DocumentUnprocessableException {
        switch (this.mode) {
            case ANNOTATE:
                stemByAnnotating(textDocument);
                return;
            case MODIFY:
                stemByModifying(textDocument);
                return;
            default:
                throw new UnsupportedOperationException("Unimplemented mode '" + this.mode + "'.");
        }
    }

    private void stemByAnnotating(TextDocument textDocument) {
        Iterator it = ((ListFeature) textDocument.get(ListFeature.class, BaseTokenizer.PROVIDED_FEATURE)).iterator();
        while (it.hasNext()) {
            PositionAnnotation positionAnnotation = (PositionAnnotation) it.next();
            positionAnnotation.getFeatureVector().add(new NominalFeature(STEM, stem(positionAnnotation.getValue())));
        }
    }

    private void stemByModifying(TextDocument textDocument) {
        ListFeature listFeature = (ListFeature) textDocument.get(ListFeature.class, BaseTokenizer.PROVIDED_FEATURE);
        ListFeature listFeature2 = new ListFeature(BaseTokenizer.PROVIDED_FEATURE);
        Iterator it = listFeature.iterator();
        while (it.hasNext()) {
            PositionAnnotation positionAnnotation = (PositionAnnotation) it.next();
            String value = positionAnnotation.getValue();
            PositionAnnotation positionAnnotation2 = new PositionAnnotation(stem(value), positionAnnotation.getStartPosition());
            positionAnnotation2.getFeatureVector().add(new NominalFeature(UNSTEM, value));
            listFeature2.add((ListFeature) positionAnnotation2);
        }
        textDocument.getFeatureVector().add(listFeature2);
    }

    public String stem(String str) {
        String lowerCase;
        synchronized (this.stemmer) {
            this.stemmer.setCurrent(str);
            this.stemmer.stem();
            lowerCase = this.stemmer.getCurrent().toLowerCase();
        }
        return lowerCase;
    }

    @Override // ws.palladian.processing.AbstractPipelineProcessor
    public String toString() {
        return "StemmerAnnotator [mode=" + this.mode + ", stemmer=" + this.stemmer.getClass().getSimpleName() + DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END;
    }
}
