package ws.palladian.extraction.location;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.LineAction;
import ws.palladian.helper.nlp.StringHelper;
import ws.palladian.processing.features.Annotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/location/AnnotationFilter.class */
public class AnnotationFilter {
    private static final Logger LOGGER = LoggerFactory.getLogger(AnnotationFilter.class);
    private final Set<String> words;
    private final Set<String> prefixes;
    private final Set<String> suffixes;
    private final Set<String> parts;

    public AnnotationFilter() {
        this(AnnotationFilter.class.getResourceAsStream("/locationsBlacklist.txt"));
    }

    public AnnotationFilter(InputStream inputStream) {
        this.words = CollectionHelper.newHashSet();
        this.prefixes = CollectionHelper.newHashSet();
        this.suffixes = CollectionHelper.newHashSet();
        this.parts = CollectionHelper.newHashSet();
        FileHelper.performActionOnEveryLine(inputStream, new LineAction() { // from class: ws.palladian.extraction.location.AnnotationFilter.1
            @Override // ws.palladian.helper.io.LineAction
            public void performAction(String str, int i) {
                if (str.isEmpty() || str.startsWith("#")) {
                    return;
                }
                if (str.startsWith(Marker.ANY_MARKER) && str.endsWith(Marker.ANY_MARKER)) {
                    AnnotationFilter.this.parts.add(str.substring(1, str.length() - 1));
                    return;
                }
                if (str.startsWith(Marker.ANY_MARKER)) {
                    AnnotationFilter.this.suffixes.add(str.substring(1));
                } else if (str.endsWith(Marker.ANY_MARKER)) {
                    AnnotationFilter.this.prefixes.add(str.substring(0, str.length() - 1));
                } else {
                    AnnotationFilter.this.words.add(str);
                }
            }
        });
        LOGGER.debug("Filter dictionary contains {} words, {} parts, {} prefixes, {} suffixes", Integer.valueOf(this.words.size()), Integer.valueOf(this.parts.size()), Integer.valueOf(this.prefixes.size()), Integer.valueOf(this.suffixes.size()));
    }

    public List<Annotation> filter(List<Annotation> list) {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        HashSet newHashSet = CollectionHelper.newHashSet();
        for (Annotation annotation : list) {
            Iterator<String> it = this.parts.iterator();
            while (true) {
                if (it.hasNext()) {
                    if (annotation.getValue().contains(it.next())) {
                        newHashSet.addAll(getParts(annotation.getValue()));
                        break;
                    }
                } else {
                    Iterator<String> it2 = this.prefixes.iterator();
                    while (true) {
                        if (it2.hasNext()) {
                            if (annotation.getValue().startsWith(it2.next())) {
                                newHashSet.addAll(getParts(annotation.getValue()));
                                break;
                            }
                        } else {
                            Iterator<String> it3 = this.suffixes.iterator();
                            while (true) {
                                if (it3.hasNext()) {
                                    if (annotation.getValue().endsWith(it3.next())) {
                                        newHashSet.addAll(getParts(annotation.getValue()));
                                        break;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
        LOGGER.debug("Fragment blacklist: {}", newHashSet);
        for (Annotation annotation2 : list) {
            if (this.words.contains(annotation2.getValue())) {
                LOGGER.debug("Remove by word list: {}", annotation2.getValue());
            } else if (StringHelper.containsWord(newHashSet, annotation2.getValue())) {
                LOGGER.debug("Remove by fragment: {}", annotation2.getValue());
            } else {
                newArrayList.add(annotation2);
            }
        }
        LOGGER.debug("Filter removed {} annotations", Integer.valueOf(list.size() - newArrayList.size()));
        return newArrayList;
    }

    private Set<String> getParts(String str) {
        return new HashSet(Arrays.asList(str.split("\\s")));
    }
}
