package ws.palladian.extraction.feature;

import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import ws.palladian.extraction.token.BaseTokenizer;
import ws.palladian.processing.DocumentUnprocessableException;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.ListFeature;
import ws.palladian.processing.features.PositionAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/TermCorpusBuilder.class */
public class TermCorpusBuilder extends TextDocumentPipelineProcessor {
    private final TermCorpus termCorpus;

    public TermCorpusBuilder() {
        this(new TermCorpus());
    }

    public TermCorpusBuilder(TermCorpus termCorpus) {
        this.termCorpus = termCorpus;
    }

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public void processDocument(TextDocument textDocument) throws DocumentUnprocessableException {
        List list = (List) textDocument.getFeatureVector().get(ListFeature.class, BaseTokenizer.PROVIDED_FEATURE);
        HashSet hashSet = new HashSet();
        Iterator it = list.iterator();
        while (it.hasNext()) {
            hashSet.add(((PositionAnnotation) it.next()).getValue().toLowerCase());
        }
        this.termCorpus.addTermsFromDocument(hashSet);
    }

    public TermCorpus getTermCorpus() {
        return this.termCorpus;
    }
}
