package ws.palladian.extraction.token;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.Validate;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.processing.features.Annotation;
import ws.palladian.processing.features.FeatureProvider;
import ws.palladian.processing.features.ImmutableAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/token/RegExTokenizer.class */
public final class RegExTokenizer extends BaseTokenizer implements FeatureProvider {
    private final Pattern pattern;
    private final String featureName;

    public RegExTokenizer() {
        this(BaseTokenizer.PROVIDED_FEATURE, Tokenizer.SPLIT_PATTERN);
    }

    public RegExTokenizer(String str, Pattern pattern) {
        Validate.notNull(str, "featureName must not be null", new Object[0]);
        Validate.notNull(pattern, "pattern must not be null", new Object[0]);
        this.pattern = pattern;
        this.featureName = str;
    }

    public RegExTokenizer(String str, String str2) {
        this(str, Pattern.compile(str2));
    }

    @Override // ws.palladian.processing.Tagger
    public List<Annotation> getAnnotations(String str) {
        Matcher matcher = this.pattern.matcher(str);
        ArrayList newArrayList = CollectionHelper.newArrayList();
        while (matcher.find()) {
            newArrayList.add(new ImmutableAnnotation(matcher.start(), matcher.group(), BaseTokenizer.PROVIDED_FEATURE));
        }
        return newArrayList;
    }

    @Override // ws.palladian.processing.features.FeatureProvider
    public String getCreatedFeatureName() {
        return this.featureName;
    }
}
