package ws.palladian.extraction.location;

import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import ws.palladian.extraction.entity.Annotations;
import ws.palladian.extraction.location.ContextClassifier;
import ws.palladian.extraction.location.disambiguation.HeuristicDisambiguation;
import ws.palladian.extraction.location.disambiguation.LocationDisambiguation;
import ws.palladian.extraction.location.persistence.LocationDatabase;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.collection.DefaultMultiMap;
import ws.palladian.helper.collection.Filter;
import ws.palladian.helper.collection.MultiMap;
import ws.palladian.helper.constants.Language;
import ws.palladian.helper.html.HtmlHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.persistence.DatabaseManagerFactory;
import ws.palladian.processing.features.Annotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/location/PalladianLocationExtractor.class */
public class PalladianLocationExtractor extends LocationExtractor {
    public static final int LONG_ANNOTATION_SPLIT = 3;
    private final LocationSource locationSource;
    private final LocationDisambiguation disambiguation;
    private static final EntityPreprocessingTagger tagger = new EntityPreprocessingTagger(3);
    private static final AnnotationFilter filter = new AnnotationFilter();
    private static final AddressTagger addressTagger = new AddressTagger();
    private static final ContextClassifier contextClassifier = new ContextClassifier(ContextClassifier.ClassificationMode.PROPAGATION);

    public PalladianLocationExtractor(LocationSource locationSource, LocationDisambiguation locationDisambiguation) {
        this.locationSource = locationSource;
        this.disambiguation = locationDisambiguation;
    }

    public PalladianLocationExtractor(LocationSource locationSource) {
        this(locationSource, new HeuristicDisambiguation());
    }

    @Override // ws.palladian.extraction.location.LocationExtractor, ws.palladian.extraction.entity.NamedEntityRecognizer, ws.palladian.processing.Tagger
    public List<LocationAnnotation> getAnnotations(String str) {
        List<ContextClassifier.ClassifiedAnnotation> classify = contextClassifier.classify(filter.filter(tagger.getAnnotations(str)), str);
        CollectionHelper.remove(classify, new Filter<Annotation>() { // from class: ws.palladian.extraction.location.PalladianLocationExtractor.1
            @Override // ws.palladian.helper.collection.Filter
            public boolean accept(Annotation annotation) {
                String value = annotation.getValue();
                return value.equals("US") || value.equals("UK") || !value.matches("[A-Z]{1,2}|[A-Z]\\.");
            }
        });
        MultiMap<ContextClassifier.ClassifiedAnnotation, Location> fetchLocations = fetchLocations(this.locationSource, classify);
        Annotations annotations = new Annotations();
        annotations.addAll(this.disambiguation.disambiguate(str, fetchLocations));
        annotations.addAll(addressTagger.getAnnotations(str));
        annotations.sort();
        annotations.removeNested();
        return annotations;
    }

    public static <A extends Annotation> MultiMap<A, Location> fetchLocations(LocationSource locationSource, List<A> list) {
        HashSet newHashSet = CollectionHelper.newHashSet();
        Iterator<A> it = list.iterator();
        while (it.hasNext()) {
            newHashSet.add(LocationExtractorUtils.normalizeName(it.next().getValue()).toLowerCase());
        }
        MultiMap<String, Location> locations = locationSource.getLocations(newHashSet, EnumSet.of(Language.ENGLISH));
        MultiMap<A, Location> createWithSet = DefaultMultiMap.createWithSet();
        for (A a : list) {
            Collection<? extends Location> collection = (Collection) locations.get(LocationExtractorUtils.normalizeName(a.getValue()).toLowerCase());
            if (collection.size() > 0) {
                createWithSet.addAll(a, collection);
            } else {
                createWithSet.addAll(a, Collections.emptySet());
            }
        }
        return createWithSet;
    }

    @Override // ws.palladian.extraction.entity.NamedEntityRecognizer
    public String getName() {
        return String.format("PalladianLocationExtractor:%s", this.disambiguation);
    }

    public static void main(String[] strArr) {
        CollectionHelper.print(new PalladianLocationExtractor((LocationDatabase) DatabaseManagerFactory.create(LocationDatabase.class, "locations")).getAnnotations(HtmlHelper.stripHtmlTags(FileHelper.readFileToString("/Users/pk/Dropbox/Uni/Dissertation_LocationLab/LGL-converted/0-all/text_44026163.txt"))));
    }
}
