package ws.palladian.extraction.feature;

import java.util.HashSet;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.lang3.Validate;
import ws.palladian.extraction.token.BaseTokenizer;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.processing.DocumentUnprocessableException;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.ListFeature;
import ws.palladian.processing.features.PositionAnnotationFactory;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/CharNGramCreator.class */
public class CharNGramCreator extends TextDocumentPipelineProcessor {
    private final int minLength;
    private final int maxLength;
    private final boolean unique;
    private final int limit;

    public CharNGramCreator(int i, int i2, boolean z, int i3) {
        Validate.inclusiveBetween(1, Integer.MAX_VALUE, Integer.valueOf(i));
        Validate.inclusiveBetween(Integer.valueOf(i), Integer.MAX_VALUE, Integer.valueOf(i2));
        Validate.isTrue(i3 > 0);
        this.minLength = i;
        this.maxLength = i2;
        this.unique = z;
        this.limit = i3;
    }

    public CharNGramCreator(int i, int i2) {
        this(i, i2, false, Integer.MAX_VALUE);
    }

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public void processDocument(TextDocument textDocument) throws DocumentUnprocessableException {
        PositionAnnotationFactory positionAnnotationFactory = new PositionAnnotationFactory(textDocument);
        ListFeature listFeature = new ListFeature(BaseTokenizer.PROVIDED_FEATURE);
        HashSet newHashSet = CollectionHelper.newHashSet();
        int length = ((String) textDocument.getContent()).length();
        loop0: for (int i = 0; i < length; i++) {
            for (int i2 = this.minLength; i2 <= this.maxLength && i + i2 <= length; i2++) {
                String substring = ((String) textDocument.getContent()).substring(i, i + i2);
                if (!this.unique || newHashSet.add(substring)) {
                    listFeature.add((ListFeature) positionAnnotationFactory.create(i, i + i2));
                    if (listFeature.size() >= this.limit) {
                        break loop0;
                    }
                }
            }
        }
        textDocument.add(listFeature);
    }

    @Override // ws.palladian.processing.AbstractPipelineProcessor
    public String toString() {
        return "CharNGramCreator [minLength=" + this.minLength + ", maxLength=" + this.maxLength + ", unique=" + this.unique + ", limit=" + this.limit + DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END;
    }
}
