package ws.palladian.extraction.patterns;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.extraction.feature.TextDocumentPipelineProcessor;
import ws.palladian.extraction.pos.BasePosTagger;
import ws.palladian.extraction.sentence.AbstractSentenceDetector;
import ws.palladian.extraction.token.BaseTokenizer;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.ListFeature;
import ws.palladian.processing.features.NominalFeature;
import ws.palladian.processing.features.PositionAnnotation;
import ws.palladian.processing.features.PositionAnnotationFactory;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/patterns/SequentialPatternAnnotator.class */
public final class SequentialPatternAnnotator extends TextDocumentPipelineProcessor {
    private static final Logger LOGGER = LoggerFactory.getLogger(SequentialPatternAnnotator.class);
    private Set<String> keywords;
    public static final String PROVIDED_FEATURE = "lsp";
    private Integer maxSequentialPatternSize;
    private Integer minSequentialPatternSize;
    private SpanExtractionStrategy extractionStrategy;

    public SequentialPatternAnnotator(String[] strArr, Integer num, Integer num2, SpanExtractionStrategy spanExtractionStrategy) {
        this.maxSequentialPatternSize = 0;
        Validate.notNull(strArr, "keywords must not be null", new Object[0]);
        Validate.notNull(num, "minSequentialPatternSize must not be null", new Object[0]);
        Validate.notNull(num2, "maxSequentialPatternSize must not be null", new Object[0]);
        Validate.notNull(spanExtractionStrategy, "extractionStrategy must not be null", new Object[0]);
        Validate.inclusiveBetween(1, Integer.MAX_VALUE, num);
        Validate.inclusiveBetween(num, Integer.MAX_VALUE, num2);
        this.keywords = new HashSet();
        Collections.addAll(this.keywords, strArr);
        this.minSequentialPatternSize = num;
        this.maxSequentialPatternSize = num2;
        this.extractionStrategy = spanExtractionStrategy;
    }

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public void processDocument(TextDocument textDocument) {
        ArrayList arrayList = new ArrayList((Collection) textDocument.get(ListFeature.class, BaseTokenizer.PROVIDED_FEATURE));
        ArrayList<PositionAnnotation> arrayList2 = new ArrayList((Collection) textDocument.get(ListFeature.class, AbstractSentenceDetector.PROVIDED_FEATURE));
        ArrayList arrayList3 = new ArrayList(markKeywords(textDocument));
        Collections.sort(arrayList);
        Collections.sort(arrayList2);
        Collections.sort(arrayList3);
        Iterator it = arrayList.iterator();
        Iterator it2 = arrayList3.iterator();
        PositionAnnotation positionAnnotation = it2.hasNext() ? (PositionAnnotation) it2.next() : null;
        PositionAnnotation positionAnnotation2 = it.hasNext() ? (PositionAnnotation) it.next() : null;
        ListFeature listFeature = new ListFeature(PROVIDED_FEATURE);
        for (PositionAnnotation positionAnnotation3 : arrayList2) {
            Integer valueOf = Integer.valueOf(positionAnnotation3.getStartPosition());
            Integer valueOf2 = Integer.valueOf(positionAnnotation3.getEndPosition());
            ArrayList arrayList4 = new ArrayList();
            Integer num = valueOf;
            while (num.intValue() < valueOf2.intValue()) {
                if (positionAnnotation != null && Integer.valueOf(positionAnnotation.getStartPosition()).equals(num)) {
                    arrayList4.add(positionAnnotation.getValue());
                    num = Integer.valueOf(positionAnnotation.getEndPosition());
                } else if (positionAnnotation2 == null || !Integer.valueOf(positionAnnotation2.getStartPosition()).equals(num)) {
                    num = Integer.valueOf(num.intValue() + 1);
                } else {
                    arrayList4.add(((NominalFeature) positionAnnotation2.getFeatureVector().get(NominalFeature.class, BasePosTagger.PROVIDED_FEATURE)).getValue());
                    num = Integer.valueOf(positionAnnotation2.getEndPosition());
                }
                while (positionAnnotation != null && positionAnnotation.getStartPosition() < num.intValue() && it2.hasNext()) {
                    positionAnnotation = (PositionAnnotation) it2.next();
                }
                while (positionAnnotation2 != null && positionAnnotation2.getStartPosition() < num.intValue() && it.hasNext()) {
                    positionAnnotation2 = (PositionAnnotation) it.next();
                }
            }
            listFeature.addAll(this.extractionStrategy.extract((String[]) arrayList4.toArray(new String[arrayList4.size()]), this.minSequentialPatternSize, this.maxSequentialPatternSize));
        }
        textDocument.add(listFeature);
    }

    private List<PositionAnnotation> markKeywords(TextDocument textDocument) {
        LinkedList linkedList = new LinkedList();
        String lowerCase = textDocument.getContent().toLowerCase();
        PositionAnnotationFactory positionAnnotationFactory = new PositionAnnotationFactory(textDocument);
        Iterator<String> it = this.keywords.iterator();
        while (it.hasNext()) {
            Matcher matcher = Pattern.compile(it.next().toLowerCase()).matcher(lowerCase);
            if (matcher.find()) {
                linkedList.add(positionAnnotationFactory.create(matcher.start(), matcher.end()));
            }
        }
        return linkedList;
    }
}
