package ws.palladian.extraction.location;

import edu.stanford.nlp.classify.LinearClassifier;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import ws.palladian.classification.CategoryEntries;
import ws.palladian.classification.CategoryEntriesMap;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.collection.Factory;
import ws.palladian.helper.collection.LazyMap;
import ws.palladian.helper.html.HtmlHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.LineAction;
import ws.palladian.helper.nlp.StringHelper;
import ws.palladian.processing.features.Annotation;
import ws.palladian.processing.features.ImmutableAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/location/ContextClassifier.class */
public class ContextClassifier {
    private static final Logger LOGGER = LoggerFactory.getLogger(ContextClassifier.class);
    private static final Map<String, String> rules = readRules(ContextClassifier.class.getResourceAsStream("/perLocContexts.csv"));
    private final ClassificationMode mode;

    /* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/location/ContextClassifier$ClassificationMode.class */
    public enum ClassificationMode {
        ISOLATED,
        PROPAGATION
    }

    /* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/location/ContextClassifier$ClassifiedAnnotation.class */
    public static class ClassifiedAnnotation extends ImmutableAnnotation {
        private final CategoryEntries categoryEntries;

        public ClassifiedAnnotation(Annotation annotation, CategoryEntries categoryEntries) {
            super(annotation.getStartPosition(), annotation.getValue(), annotation.getTag());
            this.categoryEntries = categoryEntries;
        }

        public CategoryEntries getCategoryEntries() {
            return this.categoryEntries;
        }

        @Override // ws.palladian.processing.features.AbstractAnnotation
        public String toString() {
            return "ClassifiedAnnotation [classification=" + this.categoryEntries + ", startPosition" + getStartPosition() + ", tag=" + getTag() + ", value=" + getValue() + DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END;
        }
    }

    public ContextClassifier(ClassificationMode classificationMode) {
        this.mode = classificationMode;
    }

    private static final Map<String, String> readRules(InputStream inputStream) {
        final HashMap newHashMap = CollectionHelper.newHashMap();
        FileHelper.performActionOnEveryLine(inputStream, new LineAction() { // from class: ws.palladian.extraction.location.ContextClassifier.1
            @Override // ws.palladian.helper.io.LineAction
            public void performAction(String str, int i) {
                String[] split = str.split(LinearClassifier.TEXT_SERIALIZATION_DELIMITER);
                if (str.startsWith("#") || split.length != 2) {
                    return;
                }
                newHashMap.put(split[0], split[1]);
            }
        });
        LOGGER.debug("Loaded {} context rules", Integer.valueOf(newHashMap.size()));
        return newHashMap;
    }

    public CategoryEntries classify(String str, Annotation annotation) {
        CategoryEntriesMap categoryEntriesMap = new CategoryEntriesMap();
        for (String str2 : rules.keySet()) {
            if (str2.startsWith("* ")) {
                String substring = str2.substring(2);
                if (substring.equalsIgnoreCase(getRightContext(annotation, str, substring.split("\\s").length))) {
                    categoryEntriesMap.add(rules.get(str2), 1.0d);
                }
            } else if (str2.endsWith("* ")) {
                String substring2 = str2.substring(0, str2.length() - 2);
                if (substring2.equalsIgnoreCase(getLeftContext(annotation, str, substring2.split("\\s").length))) {
                    categoryEntriesMap.add(rules.get(str2), 1.0d);
                }
            } else if (str2.startsWith(Marker.ANY_MARKER)) {
                if (containsIgnoreCase(getRightContexts(annotation, str, countAsteriscs(str2)), str2.replaceAll("\\**\\s", ""))) {
                    categoryEntriesMap.add(rules.get(str2), 1.0d);
                }
            } else if (str2.endsWith(Marker.ANY_MARKER)) {
                if (containsIgnoreCase(getLeftContexts(annotation, str, countAsteriscs(str2)), str2.replaceAll("\\s\\**", ""))) {
                    categoryEntriesMap.add(rules.get(str2), 1.0d);
                }
            } else {
                LOGGER.warn("rule " + str2 + " cannot be interpreted.");
            }
        }
        categoryEntriesMap.computeProbabilities();
        categoryEntriesMap.sort();
        return categoryEntriesMap;
    }

    private boolean containsIgnoreCase(Collection<String> collection, String str) {
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            if (it.next().equalsIgnoreCase(str)) {
                return true;
            }
        }
        return false;
    }

    private int countAsteriscs(String str) {
        return str.length() - str.replace(Marker.ANY_MARKER, "").length();
    }

    /* JADX WARN: Multi-variable type inference failed */
    public List<ClassifiedAnnotation> classify(List<? extends Annotation> list, String str) {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        if (this.mode == ClassificationMode.ISOLATED) {
            for (Annotation annotation : list) {
                newArrayList.add(new ClassifiedAnnotation(annotation, classify(str, annotation)));
            }
        } else if (this.mode == ClassificationMode.PROPAGATION) {
            LazyMap create = LazyMap.create(new Factory<CategoryEntriesMap>() { // from class: ws.palladian.extraction.location.ContextClassifier.2
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // ws.palladian.helper.collection.Factory
                public CategoryEntriesMap create() {
                    return new CategoryEntriesMap();
                }
            });
            for (Annotation annotation2 : list) {
                create.put(annotation2.getValue(), CategoryEntriesMap.merge((CategoryEntries) create.get(annotation2.getValue()), classify(str, annotation2)));
            }
            for (V v : create.values()) {
                v.computeProbabilities();
                v.sort();
            }
            for (Annotation annotation3 : list) {
                newArrayList.add(new ClassifiedAnnotation(annotation3, (CategoryEntriesMap) create.get(annotation3.getValue())));
            }
        }
        return newArrayList;
    }

    public static String getLeftContext(Annotation annotation, String str, int i) {
        try {
            StringBuilder sb = new StringBuilder();
            int i2 = 0;
            int startPosition = annotation.getStartPosition() - 1;
            for (int i3 = startPosition; i3 >= 0; i3--) {
                char charAt = str.charAt(i3);
                if (charAt == ' ' && i3 < startPosition) {
                    i2++;
                }
                if (i2 >= i || charAt == '\n' || StringHelper.isPunctuation(charAt)) {
                    break;
                }
                sb.append(charAt);
            }
            return StringHelper.reverseString(sb.toString()).trim();
        } catch (Exception e) {
            return null;
        }
    }

    public static String getRightContext(Annotation annotation, String str, int i) {
        try {
            StringBuilder sb = new StringBuilder();
            int i2 = 0;
            int endPosition = annotation.getEndPosition();
            for (int i3 = endPosition; i3 < str.length(); i3++) {
                char charAt = str.charAt(i3);
                if (charAt == ' ' && i3 > endPosition) {
                    i2++;
                }
                if (i2 >= i || charAt == '\n' || StringHelper.isPunctuation(charAt)) {
                    break;
                }
                sb.append(charAt);
            }
            return sb.toString().trim();
        } catch (Exception e) {
            return null;
        }
    }

    public static List<String> getLeftContexts(Annotation annotation, String str, int i) {
        return Arrays.asList(getLeftContext(annotation, str, i).split("\\s"));
    }

    public static List<String> getRightContexts(Annotation annotation, String str, int i) {
        return Arrays.asList(getRightContext(annotation, str, i).split("\\s"));
    }

    public static void main(String[] strArr) {
        String stripHtmlTags = HtmlHelper.stripHtmlTags(FileHelper.readFileToString("/Users/pk/Dropbox/Uni/Dissertation_LocationLab/LGL-converted/2-validation/text_44148889.txt"));
        CollectionHelper.print(new ContextClassifier(ClassificationMode.PROPAGATION).classify(new EntityPreprocessingTagger().getAnnotations(stripHtmlTags), stripHtmlTags));
    }
}
