package ws.palladian.extraction.location.disambiguation;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.extraction.location.ContextClassifier;
import ws.palladian.extraction.location.GeoCoordinate;
import ws.palladian.extraction.location.GeoUtils;
import ws.palladian.extraction.location.Location;
import ws.palladian.extraction.location.LocationAnnotation;
import ws.palladian.extraction.location.LocationExtractorUtils;
import ws.palladian.extraction.location.LocationType;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.collection.MultiMap;
import ws.palladian.processing.features.Annotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/location/disambiguation/HeuristicDisambiguation.class */
public class HeuristicDisambiguation implements LocationDisambiguation {
    private static final Logger LOGGER = LoggerFactory.getLogger(HeuristicDisambiguation.class);
    public static final int ANCHOR_DISTANCE_THRESHOLD = 100;
    public static final int LOWER_POPULATION_THRESHOLD = 5000;
    public static final int ANCHOR_POPULATION_THRESHOLD = 1000000;
    public static final int SAME_DISTANCE_THRESHOLD = 50;
    public static final int LASSO_DISTANCE_THRESHOLD = 100;
    public static final int LOWER_UNLIKELY_POPULATION_THRESHOLD = 100000;
    public static final int TOKEN_THRESHOLD = 2;
    private final int anchorDistanceThreshold;
    private final int lowerPopulationThreshold;
    private final int anchorPopulationThreshold;
    private final int sameDistanceThreshold;
    private final int lassoDistanceThreshold;
    private final int lowerUnlikelyPopulationThreshold;
    private final int tokenThreshold;

    public HeuristicDisambiguation() {
        this(100, 5000, ANCHOR_POPULATION_THRESHOLD, 50, 100, 100000, 2);
    }

    public HeuristicDisambiguation(int i, int i2, int i3, int i4, int i5, int i6, int i7) {
        this.anchorDistanceThreshold = i;
        this.lowerPopulationThreshold = i2;
        this.anchorPopulationThreshold = i3;
        this.sameDistanceThreshold = i4;
        this.lassoDistanceThreshold = i5;
        this.lowerUnlikelyPopulationThreshold = i6;
        this.tokenThreshold = i7;
    }

    @Override // ws.palladian.extraction.location.disambiguation.LocationDisambiguation
    public List<LocationAnnotation> disambiguate(String str, MultiMap<ContextClassifier.ClassifiedAnnotation, Location> multiMap) {
        multiMap.keySet().removeAll(getUnlikelyLocations(multiMap));
        ArrayList newArrayList = CollectionHelper.newArrayList();
        Set<Location> anchors = getAnchors(multiMap);
        for (ContextClassifier.ClassifiedAnnotation classifiedAnnotation : multiMap.keySet()) {
            Collection<?> collection = (Collection) multiMap.get(classifiedAnnotation);
            if (collection.isEmpty()) {
                LOGGER.debug("'{}' could not be found and will be dropped", classifiedAnnotation.getValue());
            } else {
                LOGGER.debug("'{}' has {} candidates", classifiedAnnotation.getValue(), Integer.valueOf(collection.size()));
                HashSet<Location> hashSet = new HashSet(anchors);
                hashSet.removeAll(collection);
                HashSet newHashSet = CollectionHelper.newHashSet();
                Iterator<?> it = collection.iterator();
                while (it.hasNext()) {
                    Location location = (Location) it.next();
                    if (anchors.contains(location)) {
                        LOGGER.debug("{} is in anchors", location);
                        newHashSet.add(location);
                    } else {
                        for (Location location2 : hashSet) {
                            double distance = GeoUtils.getDistance(location, location2);
                            LocationType type = location2.getType();
                            if (distance < this.anchorDistanceThreshold) {
                                LOGGER.debug("Distance of {} to anchors: {}", Double.valueOf(distance), location);
                                newHashSet.add(location);
                            } else if (type == LocationType.CITY || type == LocationType.UNIT || type == LocationType.COUNTRY) {
                                if (location.descendantOf(location2) && location.getPopulation().longValue() > this.lowerPopulationThreshold) {
                                    LOGGER.debug("{} is child of anchor '{}'", location, location2.getPrimaryName());
                                    newHashSet.add(location);
                                }
                            }
                        }
                    }
                }
                if (newHashSet.size() > 0) {
                    newArrayList.add(new LocationAnnotation(classifiedAnnotation, selectLocation(newHashSet)));
                }
            }
        }
        return newArrayList;
    }

    private Set<Annotation> getUnlikelyLocations(MultiMap<ContextClassifier.ClassifiedAnnotation, Location> multiMap) {
        HashSet newHashSet = CollectionHelper.newHashSet();
        for (ContextClassifier.ClassifiedAnnotation classifiedAnnotation : multiMap.keySet()) {
            Collection collection = (Collection) multiMap.get(classifiedAnnotation);
            boolean containsType = LocationExtractorUtils.containsType(collection, LocationType.COUNTRY, LocationType.CONTINENT);
            boolean z = LocationExtractorUtils.getHighestPopulation(collection) > ((long) this.lowerUnlikelyPopulationThreshold);
            if (!containsType && !z && classifiedAnnotation.getCategoryEntries().getProbability("PER") == 1.0d) {
                LOGGER.debug("{} does not seem to be a location and will be dropped", classifiedAnnotation);
                newHashSet.add(classifiedAnnotation);
            }
        }
        LOGGER.debug("Spotted {} unlikely locations", Integer.valueOf(newHashSet.size()));
        return newHashSet;
    }

    private static Location selectLocation(Collection<Location> collection) {
        Set filterConditionally = LocationExtractorUtils.filterConditionally(collection, new LocationExtractorUtils.LocationTypeFilter(LocationType.CONTINENT));
        if (filterConditionally.size() == 1) {
            return (Location) CollectionHelper.getFirst(filterConditionally);
        }
        ArrayList arrayList = new ArrayList(collection);
        Collections.sort(arrayList, new Comparator<Location>() { // from class: ws.palladian.extraction.location.disambiguation.HeuristicDisambiguation.1
            @Override // java.util.Comparator
            public int compare(Location location, Location location2) {
                if (location2.descendantOf(location)) {
                    return 1;
                }
                if (location.descendantOf(location2)) {
                    return -1;
                }
                Long valueOf = Long.valueOf(location.getPopulation() != null ? location.getPopulation().longValue() : 0L);
                Long valueOf2 = Long.valueOf(location2.getPopulation() != null ? location2.getPopulation().longValue() : 0L);
                if (location.getType() == LocationType.CITY) {
                    valueOf = Long.valueOf(valueOf.longValue() * 2);
                }
                if (location2.getType() == LocationType.CITY) {
                    valueOf2 = Long.valueOf(valueOf2.longValue() * 2);
                }
                return valueOf2.compareTo(valueOf);
            }
        });
        return (Location) CollectionHelper.getFirst(arrayList);
    }

    private Set<Location> getAnchors(MultiMap<? extends Annotation, Location> multiMap) {
        Location biggest;
        HashSet newHashSet = CollectionHelper.newHashSet();
        for (Location location : multiMap.allValues()) {
            LocationType type = location.getType();
            long longValue = location.getPopulation() != null ? location.getPopulation().longValue() : 0L;
            if (type == LocationType.CONTINENT || type == LocationType.COUNTRY || longValue > this.anchorPopulationThreshold) {
                LOGGER.debug("Prominent anchor location: {}", location);
                newHashSet.add(location);
            }
        }
        for (Annotation annotation : multiMap.keySet()) {
            Collection collection = (Collection) multiMap.get(annotation);
            if (!collection.isEmpty()) {
                String value = annotation.getValue();
                Set filterConditionally = LocationExtractorUtils.filterConditionally(collection, new LocationExtractorUtils.CoordinateFilter());
                if (LocationExtractorUtils.getLargestDistance(filterConditionally) < this.sameDistanceThreshold) {
                    Location biggest2 = LocationExtractorUtils.getBiggest(filterConditionally);
                    if (biggest2.getPopulation().longValue() > this.lowerPopulationThreshold || value.split("\\s").length >= this.tokenThreshold) {
                        newHashSet.add(biggest2);
                    }
                } else {
                    LOGGER.debug("Ambiguous location: {} ({} candidates)", value, Integer.valueOf(filterConditionally.size()));
                }
            }
        }
        if (newHashSet.isEmpty()) {
            newHashSet.addAll(getLassoLocations(multiMap));
        }
        if (newHashSet.isEmpty() && (biggest = LocationExtractorUtils.getBiggest(multiMap.allValues())) != null) {
            LOGGER.debug("No anchor found, took biggest location: {}", biggest);
            newHashSet.add(biggest);
        }
        if (newHashSet.isEmpty()) {
            LOGGER.debug("No anchor found.");
        }
        return newHashSet;
    }

    private Set<Location> getLassoLocations(MultiMap<? extends Annotation, Location> multiMap) {
        HashSet<Location> hashSet = new HashSet(multiMap.allValues());
        while (hashSet.size() > 1) {
            GeoCoordinate midpoint = GeoUtils.getMidpoint(hashSet);
            double d = Double.MIN_VALUE;
            Location location = null;
            for (Location location2 : hashSet) {
                double distance = GeoUtils.getDistance(location2, midpoint);
                if (distance > d) {
                    d = distance;
                    location = location2;
                }
            }
            if (d < this.lassoDistanceThreshold) {
                break;
            }
            hashSet.remove(location);
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("Removed {}, distance to center: {}, {} items left", location, Double.valueOf(d), Integer.valueOf(hashSet.size()));
            }
        }
        if (hashSet.size() < 2 || LocationExtractorUtils.sameNames(hashSet)) {
            LOGGER.debug("Could not identify lasso locations");
            return Collections.emptySet();
        }
        LOGGER.debug("Identified {} locations via lasso trick", Integer.valueOf(hashSet.size()));
        HashSet newHashSet = CollectionHelper.newHashSet();
        for (Location location3 : hashSet) {
            for (Location location4 : multiMap.allValues()) {
                if (location3.descendantOf(location4) && newHashSet.add(location4)) {
                    LOGGER.debug("Added {} to lassos because it is parent of {}", location4, location3);
                }
            }
        }
        LOGGER.debug("Adding {} parents of lasso locations", Integer.valueOf(newHashSet.size()));
        hashSet.addAll(newHashSet);
        return hashSet;
    }

    public String toString() {
        return "HeuristicDisambiguation [anchorDistanceThreshold=" + this.anchorDistanceThreshold + ", lowerPopulationThreshold=" + this.lowerPopulationThreshold + ", anchorPopulationThreshold=" + this.anchorPopulationThreshold + ", sameDistanceThreshold=" + this.sameDistanceThreshold + ", lassoDistanceThreshold=" + this.lassoDistanceThreshold + ", lowerUnlikelyPopulationThreshold=" + this.lowerUnlikelyPopulationThreshold + ", tokenThreshold=" + this.tokenThreshold + DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END;
    }
}
