package ws.palladian.extraction.location.evaluation;

import edu.stanford.nlp.classify.LinearClassifier;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.Validate;
import org.jdesktop.swingx.JXLabel;
import ws.palladian.classification.dt.QuickDtModel;
import ws.palladian.classification.utils.ClassificationUtils;
import ws.palladian.extraction.entity.NamedEntityRecognizer;
import ws.palladian.extraction.entity.evaluation.EvaluationResult;
import ws.palladian.extraction.location.LocationAnnotation;
import ws.palladian.extraction.location.LocationExtractor;
import ws.palladian.extraction.location.LocationExtractorUtils;
import ws.palladian.extraction.location.PalladianLocationExtractor;
import ws.palladian.extraction.location.disambiguation.CombinedDisambiguation;
import ws.palladian.extraction.location.disambiguation.FeatureBasedDisambiguation;
import ws.palladian.extraction.location.disambiguation.HeuristicDisambiguation;
import ws.palladian.extraction.location.persistence.LocationDatabase;
import ws.palladian.helper.ProgressMonitor;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.persistence.DatabaseManagerFactory;
import ws.palladian.processing.features.Annotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/location/evaluation/LocationExtractionEvaluator.class */
public final class LocationExtractionEvaluator {
    private final List<File> datasetPaths = CollectionHelper.newArrayList();
    private final List<LocationExtractor> extractors = CollectionHelper.newArrayList();

    public void addDataset(String str) {
        File file = new File(str);
        if (!file.isDirectory()) {
            throw new IllegalArgumentException(str + " is not a directory.");
        }
        this.datasetPaths.add(file);
    }

    public void addExtractor(LocationExtractor locationExtractor) {
        this.extractors.add(locationExtractor);
    }

    public void addExtractors(Collection<? extends LocationExtractor> collection) {
        this.extractors.addAll(collection);
    }

    public void runAll() {
        ProgressMonitor progressMonitor = new ProgressMonitor(this.datasetPaths.size() * this.extractors.size(), JXLabel.NORMAL, "LocationExtractionEvaluation");
        for (File file : this.datasetPaths) {
            Iterator<LocationExtractor> it = this.extractors.iterator();
            while (it.hasNext()) {
                run(it.next(), file);
                progressMonitor.incrementAndPrintProgress();
            }
        }
    }

    public static void run(LocationExtractor locationExtractor, File file) {
        Validate.notNull(locationExtractor, "extractor must not be null", new Object[0]);
        Validate.notNull(file, "datasetDirectory must not be null", new Object[0]);
        if (!file.isDirectory()) {
            throw new IllegalArgumentException("The provided path to the gold standard '" + file + "' does not exist or is no directory.");
        }
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        linkedHashMap.put(EvaluationResult.ResultType.CORRECT, new HashMap());
        linkedHashMap.put(EvaluationResult.ResultType.ERROR1, new HashMap());
        linkedHashMap.put(EvaluationResult.ResultType.ERROR2, new HashMap());
        linkedHashMap.put(EvaluationResult.ResultType.ERROR3, new HashMap());
        linkedHashMap.put(EvaluationResult.ResultType.ERROR4, new HashMap());
        linkedHashMap.put(EvaluationResult.ResultType.ERROR5, new HashMap());
        Iterator<LocationExtractorUtils.LocationDocument> iterateDataset = LocationExtractorUtils.iterateDataset(file);
        double d = 0.0d;
        double d2 = 0.0d;
        double d3 = 0.0d;
        double d4 = 0.0d;
        EvaluationResult evaluationResult = new EvaluationResult(Collections.emptyList());
        GeoEvaluationResult geoEvaluationResult = new GeoEvaluationResult(locationExtractor.getName(), file.getPath());
        StopWatch stopWatch = new StopWatch();
        int i = 0;
        while (iterateDataset.hasNext()) {
            LocationExtractorUtils.LocationDocument next = iterateDataset.next();
            List<LocationAnnotation> annotations = locationExtractor.getAnnotations(next.getText());
            EvaluationResult evaluate = NamedEntityRecognizer.evaluate(next.getAnnotations(), annotations, (Set<String>) Collections.emptySet());
            ((Map) linkedHashMap.get(EvaluationResult.ResultType.CORRECT)).put(next.getFileName(), evaluate.getAnnotations(EvaluationResult.ResultType.CORRECT));
            ((Map) linkedHashMap.get(EvaluationResult.ResultType.ERROR1)).put(next.getFileName(), evaluate.getAnnotations(EvaluationResult.ResultType.ERROR1));
            ((Map) linkedHashMap.get(EvaluationResult.ResultType.ERROR2)).put(next.getFileName(), evaluate.getAnnotations(EvaluationResult.ResultType.ERROR2));
            ((Map) linkedHashMap.get(EvaluationResult.ResultType.ERROR3)).put(next.getFileName(), evaluate.getAnnotations(EvaluationResult.ResultType.ERROR3));
            ((Map) linkedHashMap.get(EvaluationResult.ResultType.ERROR4)).put(next.getFileName(), evaluate.getAnnotations(EvaluationResult.ResultType.ERROR4));
            ((Map) linkedHashMap.get(EvaluationResult.ResultType.ERROR5)).put(next.getFileName(), evaluate.getAnnotations(EvaluationResult.ResultType.ERROR5));
            Double valueOf = Double.valueOf(evaluate.getPrecision(EvaluationResult.EvaluationMode.MUC));
            if (!valueOf.equals(Double.valueOf(Double.NaN))) {
                d += valueOf.doubleValue();
            }
            Double valueOf2 = Double.valueOf(evaluate.getPrecision(EvaluationResult.EvaluationMode.EXACT_MATCH));
            if (!valueOf2.equals(Double.valueOf(Double.NaN))) {
                d2 += valueOf2.doubleValue();
            }
            Double valueOf3 = Double.valueOf(evaluate.getRecall(EvaluationResult.EvaluationMode.MUC));
            if (!valueOf3.equals(Double.valueOf(Double.NaN))) {
                d3 += valueOf3.doubleValue();
            }
            Double valueOf4 = Double.valueOf(evaluate.getRecall(EvaluationResult.EvaluationMode.EXACT_MATCH));
            if (!valueOf4.equals(Double.valueOf(Double.NaN))) {
                d4 += valueOf4.doubleValue();
            }
            evaluationResult.merge(evaluate);
            i++;
            geoEvaluationResult.addResultFromDocument(next, annotations);
        }
        double d5 = d2 / i;
        double d6 = d4 / i;
        double d7 = d / i;
        double d8 = d3 / i;
        StringBuilder sb = new StringBuilder();
        sb.append("Result for:").append(locationExtractor.getName()).append("\n\n");
        sb.append("Using dataset:").append(file.getPath()).append("\n\n");
        sb.append("============ macro average ============\n\n");
        sb.append("Precision-Exact:").append(d5).append('\n');
        sb.append("Recall-Exact:").append(d6).append('\n');
        sb.append("F1-Exact:").append(((2.0d * d5) * d6) / (d5 + d6)).append('\n');
        sb.append('\n');
        sb.append("Precision-MUC:").append(d7).append('\n');
        sb.append("Recall-MUC:").append(d8).append('\n');
        sb.append("F1-MUC:").append(((2.0d * d7) * d8) / (d7 + d8)).append("\n\n");
        sb.append("============ micro average ============\n\n");
        sb.append("Precision-Exact:").append(evaluationResult.getPrecision(EvaluationResult.EvaluationMode.EXACT_MATCH)).append('\n');
        sb.append("Recall-Exact:").append(evaluationResult.getRecall(EvaluationResult.EvaluationMode.EXACT_MATCH)).append('\n');
        sb.append("F1-Exact:").append(evaluationResult.getF1(EvaluationResult.EvaluationMode.EXACT_MATCH)).append('\n');
        sb.append('\n');
        sb.append("Precision-MUC:").append(evaluationResult.getPrecision(EvaluationResult.EvaluationMode.MUC)).append('\n');
        sb.append("Recall-MUC:").append(evaluationResult.getRecall(EvaluationResult.EvaluationMode.MUC)).append('\n');
        sb.append("F1-MUC:").append(evaluationResult.getF1(EvaluationResult.EvaluationMode.MUC)).append("\n\n");
        sb.append("============ recognition only ============\n\n");
        int size = evaluationResult.getAnnotations(EvaluationResult.ResultType.CORRECT).size() + evaluationResult.getAnnotations(EvaluationResult.ResultType.ERROR3).size();
        int size2 = evaluationResult.getAnnotations(EvaluationResult.ResultType.CORRECT).size() + evaluationResult.getAnnotations(EvaluationResult.ResultType.ERROR3).size() + evaluationResult.getAnnotations(EvaluationResult.ResultType.ERROR1).size() + evaluationResult.getAnnotations(EvaluationResult.ResultType.ERROR4).size() + evaluationResult.getAnnotations(EvaluationResult.ResultType.ERROR5).size();
        int size3 = evaluationResult.getAnnotations(EvaluationResult.ResultType.CORRECT).size() + evaluationResult.getAnnotations(EvaluationResult.ResultType.ERROR3).size() + evaluationResult.getAnnotations(EvaluationResult.ResultType.ERROR2).size();
        double d9 = size / size2;
        double d10 = size / size3;
        sb.append("Precision:").append(d9).append('\n');
        sb.append("Recall:").append(d10).append('\n');
        double d11 = ((2.0d * d9) * d10) / (d9 + d10);
        sb.append("F1:").append(d11).append("\n\n");
        sb.append("Elapsed time:").append(stopWatch.getTotalElapsedTimeString()).append('\n');
        StringBuilder sb2 = new StringBuilder();
        sb2.append(sb.toString().replace(':', ';'));
        sb2.append("\n\n\n");
        for (Map.Entry entry : linkedHashMap.entrySet()) {
            EvaluationResult.ResultType resultType = (EvaluationResult.ResultType) entry.getKey();
            int i2 = 0;
            Iterator it = ((Map) entry.getValue()).values().iterator();
            while (it.hasNext()) {
                i2 += ((Collection) it.next()).size();
            }
            sb2.append(resultType.getDescription()).append(ClassificationUtils.DEFAULT_SEPARATOR).append(i2).append(FileHelper.NEWLINE_CHARACTER);
            for (Map.Entry entry2 : ((Map) entry.getValue()).entrySet()) {
                Iterator it2 = ((Collection) entry2.getValue()).iterator();
                while (it2.hasNext()) {
                    sb2.append(LinearClassifier.TEXT_SERIALIZATION_DELIMITER).append((Annotation) it2.next()).append(ClassificationUtils.DEFAULT_SEPARATOR).append((String) entry2.getKey()).append(FileHelper.NEWLINE_CHARACTER);
                }
            }
            sb2.append("\n\n");
        }
        long currentTimeMillis = System.currentTimeMillis();
        FileHelper.writeToFile("data/temp/" + currentTimeMillis + "_allErrors.csv", sb2);
        StringBuilder sb3 = new StringBuilder();
        File file2 = new File("data/temp/_locationsSummary.csv");
        if (!file2.exists()) {
            sb3.append("timestamp;dataset;extractor;prExact;rcExact;f1Exact;prMUC;rcMUC;f1MUC;prRec;rcRec;f1Rec;prGeo;rcGeo;f1Geo\n");
        }
        sb3.append(currentTimeMillis).append(';');
        sb3.append(file.getPath()).append(';');
        sb3.append(locationExtractor.getName()).append(';');
        sb3.append(evaluationResult.getPrecision(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(';');
        sb3.append(evaluationResult.getRecall(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(';');
        sb3.append(evaluationResult.getF1(EvaluationResult.EvaluationMode.EXACT_MATCH)).append(';');
        sb3.append(evaluationResult.getPrecision(EvaluationResult.EvaluationMode.MUC)).append(';');
        sb3.append(evaluationResult.getRecall(EvaluationResult.EvaluationMode.MUC)).append(';');
        sb3.append(evaluationResult.getF1(EvaluationResult.EvaluationMode.MUC)).append(';');
        sb3.append(d9).append(';');
        sb3.append(d10).append(';');
        sb3.append(d11).append(';');
        sb3.append(geoEvaluationResult.getPrecision()).append(';');
        sb3.append(geoEvaluationResult.getRecall()).append(';');
        sb3.append(geoEvaluationResult.getF1()).append(';').append('\n');
        FileHelper.appendFile(file2.getPath(), sb3);
        System.out.println(sb);
        System.out.println("======= geo =========");
        System.out.println(geoEvaluationResult.getSummary());
        geoEvaluationResult.writeDetailedReport(new File("data/temp/" + currentTimeMillis + "_distances.csv"));
    }

    private static List<LocationExtractor> createForParameterOptimization(LocationDatabase locationDatabase) {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        Iterator it = Arrays.asList(0, 10, 100, 1000, 10000, 100000, Integer.valueOf(HeuristicDisambiguation.ANCHOR_POPULATION_THRESHOLD)).iterator();
        while (it.hasNext()) {
            newArrayList.add(new PalladianLocationExtractor(locationDatabase, new HeuristicDisambiguation(((Integer) it.next()).intValue(), 5000, HeuristicDisambiguation.ANCHOR_POPULATION_THRESHOLD, 50, 100, 100000, 2)));
        }
        for (int i = 0; i <= 20000; i += 1000) {
            newArrayList.add(new PalladianLocationExtractor(locationDatabase, new HeuristicDisambiguation(100, i, HeuristicDisambiguation.ANCHOR_POPULATION_THRESHOLD, 50, 100, 100000, 2)));
        }
        for (int i2 = 0; i2 <= 9; i2++) {
            newArrayList.add(new PalladianLocationExtractor(locationDatabase, new HeuristicDisambiguation(100, 5000, (int) Math.pow(10.0d, i2), 50, 100, 100000, 2)));
        }
        for (int i3 = 0; i3 <= 200; i3 += 10) {
            newArrayList.add(new PalladianLocationExtractor(locationDatabase, new HeuristicDisambiguation(100, 5000, HeuristicDisambiguation.ANCHOR_POPULATION_THRESHOLD, i3, 100, 100000, 2)));
        }
        for (int i4 = 0; i4 <= 200; i4 += 10) {
            newArrayList.add(new PalladianLocationExtractor(locationDatabase, new HeuristicDisambiguation(100, 5000, HeuristicDisambiguation.ANCHOR_POPULATION_THRESHOLD, 50, i4, 100000, 2)));
        }
        for (int i5 = 0; i5 <= 9; i5++) {
            newArrayList.add(new PalladianLocationExtractor(locationDatabase, new HeuristicDisambiguation(100, 5000, HeuristicDisambiguation.ANCHOR_POPULATION_THRESHOLD, 50, 100, (int) Math.pow(10.0d, i5), 2)));
        }
        for (int i6 = 0; i6 <= 10; i6++) {
            newArrayList.add(new PalladianLocationExtractor(locationDatabase, new HeuristicDisambiguation(100, 5000, HeuristicDisambiguation.ANCHOR_POPULATION_THRESHOLD, 50, 100, 100000, i6)));
        }
        return newArrayList;
    }

    private static List<LocationExtractor> createForThresholdAnalysis(LocationDatabase locationDatabase, QuickDtModel quickDtModel) {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        for (int i = 0; i <= 10; i++) {
            newArrayList.add(new PalladianLocationExtractor(locationDatabase, new FeatureBasedDisambiguation(quickDtModel, i / 10.0d)));
        }
        return newArrayList;
    }

    public static void main(String[] strArr) {
        LocationExtractionEvaluator locationExtractionEvaluator = new LocationExtractionEvaluator();
        locationExtractionEvaluator.addDataset("/Users/pk/Dropbox/Uni/Datasets/TUD-Loc-2013/TUD-Loc-2013_V2/2-validation");
        locationExtractionEvaluator.addExtractor(new PalladianLocationExtractor((LocationDatabase) DatabaseManagerFactory.create(LocationDatabase.class, "locations"), new CombinedDisambiguation((QuickDtModel) FileHelper.deserialize("data/temp/location_disambiguation_1377440795471.model"))));
        locationExtractionEvaluator.runAll();
    }
}
