package ws.palladian.extraction.keyphrase.evaluation;

import com.aliasi.util.Strings;
import edu.stanford.nlp.ling.CoreLabel;
import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tartarus.snowball.SnowballStemmer;
import org.tartarus.snowball.ext.englishStemmer;
import ws.palladian.extraction.keyphrase.Keyphrase;
import ws.palladian.extraction.keyphrase.KeyphraseExtractor;
import ws.palladian.extraction.keyphrase.extractors.MachineLearningBasedExtractor;
import ws.palladian.extraction.keyphrase.extractors.RuleBasedExtractor;
import ws.palladian.extraction.keyphrase.temp.Dataset2;
import ws.palladian.extraction.keyphrase.temp.DatasetHelper;
import ws.palladian.extraction.keyphrase.temp.DatasetItem;
import ws.palladian.helper.io.FileHelper;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/keyphrase/evaluation/KeyphraseExtractorEvaluator.class */
public class KeyphraseExtractorEvaluator {
    private static final Logger LOGGER = LoggerFactory.getLogger(KeyphraseExtractorEvaluator.class);
    private final List<KeyphraseExtractor> extractors = new ArrayList();
    private final SnowballStemmer stemmer = new englishStemmer();

    public void addExtractor(KeyphraseExtractor keyphraseExtractor) {
        this.extractors.add(keyphraseExtractor);
    }

    public void evaluate(Dataset2 dataset2, int i) {
        LOGGER.info("dataset " + dataset2);
        Iterator<KeyphraseExtractor> it = this.extractors.iterator();
        while (it.hasNext()) {
            evaluate(it.next(), dataset2, i);
        }
    }

    public void evaluate(Dataset2 dataset2, Dataset2 dataset22) {
        for (KeyphraseExtractor keyphraseExtractor : this.extractors) {
            LOGGER.info("evaluating " + keyphraseExtractor.toString());
            KeyphraseExtractorEvaluationResult keyphraseExtractorEvaluationResult = new KeyphraseExtractorEvaluationResult();
            keyphraseExtractor.train(dataset2);
            test(keyphraseExtractor, dataset22, keyphraseExtractorEvaluationResult);
            LOGGER.info(keyphraseExtractorEvaluationResult.toString());
        }
    }

    private void evaluate(KeyphraseExtractor keyphraseExtractor, Dataset2 dataset2, int i) {
        LOGGER.info("evaluating " + keyphraseExtractor.toString());
        Iterator<Dataset2[]> crossValidate = DatasetHelper.crossValidate(dataset2, i);
        int i2 = 1;
        KeyphraseExtractorEvaluationResult keyphraseExtractorEvaluationResult = new KeyphraseExtractorEvaluationResult();
        while (crossValidate.hasNext()) {
            int i3 = i2;
            i2++;
            LOGGER.debug("fold " + i3 + CoreLabel.TAG_SEPARATOR + i);
            Dataset2[] next = crossValidate.next();
            Dataset2 dataset22 = next[0];
            Dataset2 dataset23 = next[1];
            keyphraseExtractor.train(dataset22);
            test(keyphraseExtractor, dataset23, keyphraseExtractorEvaluationResult);
        }
        LOGGER.info(keyphraseExtractorEvaluationResult.toString());
    }

    private void test(KeyphraseExtractor keyphraseExtractor, Dataset2 dataset2, KeyphraseExtractorEvaluationResult keyphraseExtractorEvaluationResult) {
        keyphraseExtractor.startExtraction();
        int i = 0;
        Iterator<DatasetItem> it = dataset2.iterator();
        while (it.hasNext()) {
            DatasetItem next = it.next();
            HashSet hashSet = new HashSet();
            for (String str : next.getCategories()) {
                hashSet.add(str.toLowerCase());
            }
            int size = hashSet.size();
            Set<String> stem = stem(hashSet);
            stem.addAll(hashSet);
            List<Keyphrase> extract = keyphraseExtractor.extract(FileHelper.readFileToString(next.getFile()));
            HashSet hashSet2 = new HashSet();
            Iterator<Keyphrase> it2 = extract.iterator();
            while (it2.hasNext()) {
                if (!hashSet2.add(it2.next().getValue())) {
                    it2.remove();
                }
            }
            int i2 = 0;
            int size2 = extract.size();
            for (Keyphrase keyphrase : extract) {
                Iterator<String> it3 = stem.iterator();
                while (true) {
                    if (it3.hasNext()) {
                        String next2 = it3.next();
                        if (next2.equalsIgnoreCase(keyphrase.getValue()) | next2.equalsIgnoreCase(keyphrase.getValue().replace(Strings.SINGLE_SPACE_STRING, "")) | next2.equalsIgnoreCase(stem(keyphrase.getValue())) | next2.equalsIgnoreCase(stem(keyphrase.getValue().replace(Strings.SINGLE_SPACE_STRING, "")))) {
                            i2++;
                            break;
                        }
                    }
                }
            }
            float f = i2 / size2;
            if (Float.isNaN(f)) {
                f = 0.0f;
            }
            float f2 = i2 / size;
            LOGGER.debug("item " + i + CoreLabel.TAG_SEPARATOR + dataset2.size());
            LOGGER.debug("real keyphrases: " + hashSet);
            LOGGER.debug("assigned keyphrases: " + extract);
            LOGGER.debug("real: " + size + " assigned: " + size2 + " correct: " + i2);
            LOGGER.debug("pr: " + f + " rc: " + f2);
            LOGGER.debug("----------------------------------------------------------");
            keyphraseExtractorEvaluationResult.addTestResult(f, f2, size2);
            i++;
        }
        keyphraseExtractor.reset();
    }

    private String stem(String str) {
        StringBuilder sb = new StringBuilder();
        for (String str2 : str.toLowerCase().split(Strings.SINGLE_SPACE_STRING)) {
            this.stemmer.setCurrent(str2);
            this.stemmer.stem();
            sb.append(this.stemmer.getCurrent());
        }
        return sb.toString();
    }

    private Set<String> stem(Set<String> set) {
        HashSet hashSet = new HashSet();
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            hashSet.add(stem(it.next()));
        }
        return hashSet;
    }

    public static void main(String[] strArr) {
        DatasetHelper.loadDataset(new File("/Users/pk/Dropbox/Uni/Datasets/citeulike180/citeulike180index.txt"));
        DatasetHelper.loadDataset(new File("/Users/pk/Dropbox/Uni/Datasets/SemEval2010/semEvalTrainCombinedIndex.txt"));
        DatasetHelper.loadDataset(new File("/Users/pk/Dropbox/Uni/Datasets/SemEval2010/semEvalTestCombinedIndex.txt"));
        Dataset2 loadDataset = DatasetHelper.loadDataset(new File("/Users/pk/Desktop/delicioust140/split_aa.txt"));
        Dataset2 loadDataset2 = DatasetHelper.loadDataset(new File("/Users/pk/Desktop/delicioust140/split_ab.txt"));
        KeyphraseExtractorEvaluator keyphraseExtractorEvaluator = new KeyphraseExtractorEvaluator();
        keyphraseExtractorEvaluator.addExtractor(new RuleBasedExtractor());
        keyphraseExtractorEvaluator.addExtractor(new MachineLearningBasedExtractor());
        keyphraseExtractorEvaluator.evaluate(loadDataset, loadDataset2);
    }
}
