package ws.palladian.extraction.feature;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import ws.palladian.extraction.token.BaseTokenizer;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.processing.DocumentUnprocessableException;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.NominalFeature;
import ws.palladian.processing.features.PositionAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/NGramCreator.class */
public class NGramCreator extends TextDocumentPipelineProcessor {
    private final int minLength;
    private final int maxLength;
    private final String[] considerableFeatureDescriptors;

    public NGramCreator(String... strArr) {
        this(2, 2, strArr);
    }

    public NGramCreator(int i, String... strArr) {
        this(2, i, strArr);
    }

    public NGramCreator(int i, int i2, String... strArr) {
        Validate.notNull(strArr, "considerableFeatureDescriptors must not be null", new Object[0]);
        Validate.inclusiveBetween(1, Integer.MAX_VALUE, Integer.valueOf(i));
        Validate.inclusiveBetween(Integer.valueOf(i), Integer.MAX_VALUE, Integer.valueOf(i2));
        this.minLength = i;
        this.maxLength = i2;
        this.considerableFeatureDescriptors = strArr;
    }

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public void processDocument(TextDocument textDocument) throws DocumentUnprocessableException {
        List<PositionAnnotation> tokenAnnotations = BaseTokenizer.getTokenAnnotations(textDocument);
        ArrayList newArrayList = CollectionHelper.newArrayList();
        for (int i = this.minLength; i <= this.maxLength; i++) {
            newArrayList.addAll(createNGrams(textDocument, tokenAnnotations, i));
        }
        tokenAnnotations.addAll(newArrayList);
    }

    private List<PositionAnnotation> createNGrams(TextDocument textDocument, List<PositionAnnotation> list, int i) {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        PositionAnnotation[] positionAnnotationArr = (PositionAnnotation[]) list.toArray(new PositionAnnotation[list.size()]);
        for (int i2 = 0; i2 < (positionAnnotationArr.length - i) + 1; i2++) {
            ArrayList newArrayList2 = CollectionHelper.newArrayList();
            int endPosition = positionAnnotationArr[i2].getEndPosition();
            int i3 = i2;
            while (true) {
                if (i3 < i2 + i) {
                    PositionAnnotation positionAnnotation = positionAnnotationArr[i3];
                    newArrayList2.add(positionAnnotation);
                    if (i3 > i2) {
                        if (endPosition + 1 != positionAnnotation.getStartPosition()) {
                            break;
                        }
                        endPosition = positionAnnotation.getEndPosition();
                    }
                    i3++;
                } else {
                    PositionAnnotation postProcess = postProcess(newArrayList2);
                    if (postProcess != null) {
                        newArrayList.add(postProcess);
                    }
                }
            }
        }
        return newArrayList;
    }

    protected PositionAnnotation postProcess(List<PositionAnnotation> list) {
        int i = -1;
        StringBuilder sb = new StringBuilder();
        for (int i2 = 0; i2 < list.size(); i2++) {
            PositionAnnotation positionAnnotation = list.get(i2);
            if (i2 == 0) {
                i = positionAnnotation.getStartPosition();
            }
            sb.append(positionAnnotation.getValue()).append(' ');
        }
        if (i == -1) {
            throw new IllegalStateException("Yo, something is fucked up.");
        }
        String trim = sb.toString().trim();
        if (trim.isEmpty()) {
            return null;
        }
        PositionAnnotation positionAnnotation2 = new PositionAnnotation(trim, i);
        for (String str : this.considerableFeatureDescriptors) {
            ArrayList newArrayList = CollectionHelper.newArrayList();
            Iterator<PositionAnnotation> it = list.iterator();
            while (it.hasNext()) {
                newArrayList.add(((NominalFeature) it.next().getFeatureVector().get(NominalFeature.class, str)).getValue());
            }
            positionAnnotation2.getFeatureVector().add(new NominalFeature(str, StringUtils.join(newArrayList, "")));
        }
        return positionAnnotation2;
    }

    @Override // ws.palladian.processing.AbstractPipelineProcessor
    public String toString() {
        return "NGramCreator [minLength=" + this.minLength + ", maxLength=" + this.maxLength + DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END;
    }
}
