package ws.palladian.classification.language.evaluation;

import com.aliasi.util.Strings;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.classification.language.PalladianLangDetect;
import ws.palladian.classification.text.evaluation.Dataset;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.constants.Language;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.math.MathHelper;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/classification/language/evaluation/LanguageDetectionEvaluation.class */
public class LanguageDetectionEvaluation {
    private static final Logger LOGGER = LoggerFactory.getLogger(LanguageDetectionEvaluation.class);
    private static final String PALLADIAN_MODEL_PATH = "";

    public void evaluate(Dataset dataset, Set<String> set, Integer num) {
        LOGGER.info("evaluate JLangDetect vs. Google vs. Alchemy vs. Palladian");
        StopWatch stopWatch = new StopWatch();
        PalladianLangDetect palladianLangDetect = new PalladianLangDetect("");
        List<String> readFileToArray = FileHelper.readFileToArray(dataset.getPath());
        int size = readFileToArray.size();
        int i = 0;
        int i2 = 0;
        int i3 = 1;
        int size2 = readFileToArray.size();
        Iterator<String> it = readFileToArray.iterator();
        while (it.hasNext()) {
            String[] split = it.next().split(dataset.getSeparationString());
            String str = split[0];
            String str2 = split[1];
            if (dataset.isFirstFieldLink()) {
                str = FileHelper.readFileToString(dataset.getRootPath() + str);
            }
            if (num != null) {
                str = str.substring(0, Math.min(num.intValue(), str.length()));
            }
            boolean z = false;
            Language classify = palladianLangDetect.classify(str);
            if (str2.equals(classify.getIso6391())) {
                i++;
                z = true;
            }
            if (classify != null) {
                i2++;
            }
            LOGGER.info("line " + i3 + ", " + (100.0d * MathHelper.round(i3 / size2, 2)) + "% (" + str2 + ") -> jlang: false | google: false | alchemy: false | palladian: " + z);
            i3++;
        }
        LOGGER.info("evaluated over " + size + " strings in " + stopWatch.getElapsedTimeString());
        LOGGER.info("Accuracy JLangDetect: " + MathHelper.round((100 * 0) / 0, 2) + "% (0 classified)");
        LOGGER.info("Accuracy Google     : " + MathHelper.round((100 * 0) / 0, 2) + "% (0 classified)");
        LOGGER.info("Accuracy Alchemy    : " + MathHelper.round((100 * 0) / 0, 2) + "% (0 classified)");
        LOGGER.info("Accuracy Palladian  : " + MathHelper.round((100 * i) / i2, 2) + "% (" + i2 + " classified)");
    }

    public static void main(String[] strArr) {
        LanguageDetectionEvaluation languageDetectionEvaluation = new LanguageDetectionEvaluation();
        HashSet hashSet = new HashSet();
        hashSet.add("da");
        hashSet.add("de");
        hashSet.add("el");
        hashSet.add("en");
        hashSet.add("es");
        hashSet.add("fi");
        hashSet.add("fr");
        hashSet.add("it");
        hashSet.add("nl");
        hashSet.add("pt");
        hashSet.add("sv");
        Dataset dataset = new Dataset();
        dataset.setPath("/home/pk/datasets/JRCLanguageCorpus/indexAll22Languages_ipc100_split2.txt");
        dataset.setFirstFieldLink(true);
        dataset.setSeparationString(Strings.SINGLE_SPACE_STRING);
        languageDetectionEvaluation.evaluate(dataset, hashSet, 30);
    }
}
