package ws.palladian.extraction.token;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.Validate;
import ws.palladian.extraction.feature.TextDocumentPipelineProcessor;
import ws.palladian.processing.DocumentUnprocessableException;
import ws.palladian.processing.Tagger;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.Annotation;
import ws.palladian.processing.features.ListFeature;
import ws.palladian.processing.features.PositionAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/token/BaseTokenizer.class */
public abstract class BaseTokenizer extends TextDocumentPipelineProcessor implements Tagger {
    public static final String PROVIDED_FEATURE = "ws.palladian.features.tokens";

    public static ListFeature<PositionAnnotation> getTokenAnnotations(TextDocument textDocument) {
        Validate.notNull(textDocument, "document must not be null");
        return (ListFeature) textDocument.get(ListFeature.class, PROVIDED_FEATURE);
    }

    public static List<String> getTokens(TextDocument textDocument) {
        Validate.notNull(textDocument, "document must not be null");
        ArrayList arrayList = new ArrayList();
        Iterator<T> it = getTokenAnnotations(textDocument).iterator();
        while (it.hasNext()) {
            arrayList.add(((PositionAnnotation) it.next()).getValue());
        }
        return arrayList;
    }

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public final void processDocument(TextDocument textDocument) throws DocumentUnprocessableException {
        String content = textDocument.getContent();
        ListFeature listFeature = new ListFeature(PROVIDED_FEATURE);
        for (Annotation annotation : getAnnotations(content)) {
            listFeature.add((ListFeature) new PositionAnnotation(annotation.getValue(), annotation.getStartPosition()));
        }
        textDocument.getFeatureVector().add(listFeature);
    }
}
