package ws.palladian.extraction.pos;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import ws.palladian.extraction.entity.TaggingFormat;
import ws.palladian.extraction.entity.tagger.NerHelper;
import ws.palladian.extraction.feature.TextDocumentPipelineProcessor;
import ws.palladian.extraction.token.BaseTokenizer;
import ws.palladian.extraction.token.RegExTokenizer;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.processing.DocumentUnprocessableException;
import ws.palladian.processing.Tagger;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.Annotation;
import ws.palladian.processing.features.ImmutableAnnotation;
import ws.palladian.processing.features.ListFeature;
import ws.palladian.processing.features.NominalFeature;
import ws.palladian.processing.features.PositionAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/pos/BasePosTagger.class */
public abstract class BasePosTagger extends TextDocumentPipelineProcessor implements Tagger {
    public static final String PROVIDED_FEATURE = "ws.palladian.features.pos";
    private static final BaseTokenizer DEFAULT_TOKENIZER = new RegExTokenizer();

    @Override // ws.palladian.processing.Tagger
    public List<Annotation> getAnnotations(String str) {
        TextDocument textDocument = new TextDocument(str);
        try {
            getTokenizer().processDocument(textDocument);
            processDocument(textDocument);
            List<PositionAnnotation> list = (List) textDocument.getFeatureVector().get(ListFeature.class, BaseTokenizer.PROVIDED_FEATURE);
            ArrayList newArrayList = CollectionHelper.newArrayList();
            for (PositionAnnotation positionAnnotation : list) {
                newArrayList.add(new ImmutableAnnotation(positionAnnotation.getStartPosition(), positionAnnotation.getValue(), ((NominalFeature) positionAnnotation.getFeatureVector().get(NominalFeature.class, PROVIDED_FEATURE)).getValue()));
            }
            return newArrayList;
        } catch (DocumentUnprocessableException e) {
            throw new IllegalArgumentException(e);
        }
    }

    public String getTaggedString(String str) {
        return NerHelper.tag(str, getAnnotations(str), TaggingFormat.SLASHES);
    }

    protected BaseTokenizer getTokenizer() {
        return DEFAULT_TOKENIZER;
    }

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public void processDocument(TextDocument textDocument) throws DocumentUnprocessableException {
        tag((List) textDocument.get(ListFeature.class, BaseTokenizer.PROVIDED_FEATURE));
    }

    protected abstract void tag(List<PositionAnnotation> list);

    /* JADX INFO: Access modifiers changed from: protected */
    public static List<String> getTokenList(List<PositionAnnotation> list) {
        ArrayList arrayList = new ArrayList(list.size());
        Iterator<PositionAnnotation> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getValue());
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static String normalizeTag(String str) {
        return str.replaceAll("-.*", "");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static void assignTag(PositionAnnotation positionAnnotation, List<String> list) {
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            positionAnnotation.getFeatureVector().add(new NominalFeature(PROVIDED_FEATURE, it.next().toUpperCase()));
        }
    }

    public abstract String getName();
}
