package ws.palladian.extraction.entity.tagger;

import com.aliasi.util.Strings;
import de.julielab.jnet.tagger.JNETException;
import de.julielab.jnet.tagger.NETagger;
import de.julielab.jnet.tagger.Sentence;
import de.julielab.jnet.tagger.Tags;
import de.julielab.jnet.utils.Utils;
import edu.stanford.nlp.classify.LinearClassifier;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import ws.palladian.extraction.entity.Annotations;
import ws.palladian.extraction.entity.ContextAnnotation;
import ws.palladian.extraction.entity.FileFormatParser;
import ws.palladian.extraction.entity.TaggingFormat;
import ws.palladian.extraction.entity.TrainableNamedEntityRecognizer;
import ws.palladian.extraction.entity.evaluation.EvaluationResult;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.processing.features.Annotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/entity/tagger/JulieNer.class */
public class JulieNer extends TrainableNamedEntityRecognizer {
    private String configFileContent = "";
    private NETagger tagger;

    public JulieNer() {
        this.configFileContent += "pos_feat_enabled = false\n";
        this.configFileContent += "pos_feat_unit = pos\n";
        this.configFileContent += "pos_feat_position = 1\n";
        this.configFileContent += "pos_begin_flag = false\n";
        this.configFileContent += "offset_conjunctions = (-1)(1)\n";
        this.configFileContent += "gap_character = @\n";
        this.configFileContent += "stemming_enabled = true\n";
        this.configFileContent += "feat_wc_enabled = true\n";
        this.configFileContent += "feat_bwc_enabled = true\n";
        this.configFileContent += "feat_bioregexp_enabled = true\n";
    }

    public void demo() {
        demo("John J. Smith and the Nexus One location mention Seattle in the text John J. Smith lives in Seattle. The iphone 4 is a mobile phone.");
    }

    public void demo(String str) {
        train("data/datasets/ner/sample/trainingColumn.tsv", "data/temp/personPhoneCity.mod");
        loadModel("data/temp/personPhoneCity.mod.gz");
        System.out.println(tag(str));
    }

    @Override // ws.palladian.extraction.entity.TrainableNamedEntityRecognizer
    public String getModelFileEnding() {
        return CompressorStreamFactory.GZIP;
    }

    @Override // ws.palladian.extraction.entity.TrainableNamedEntityRecognizer
    public boolean setsModelFileEndingAutomatically() {
        return true;
    }

    @Override // ws.palladian.extraction.entity.TrainableNamedEntityRecognizer
    public boolean loadModel(String str) {
        StopWatch stopWatch = new StopWatch();
        if (!str.endsWith("." + getModelFileEnding())) {
            str = str + "." + getModelFileEnding();
        }
        File file = new File(str);
        NETagger nETagger = new NETagger();
        try {
            nETagger.readModel(file.toString());
            this.tagger = nETagger;
            LOGGER.info("model " + file.toString() + " successfully loaded in " + stopWatch.getElapsedTimeString());
            return true;
        } catch (Exception e) {
            LOGGER.error(getName() + " error in loading model: " + e.getMessage());
            return false;
        }
    }

    @Override // ws.palladian.extraction.entity.NamedEntityRecognizer, ws.palladian.processing.Tagger
    public List<Annotation> getAnnotations(String str) {
        FileHelper.writeToFile("data/temp/julieInputText.txt", str);
        FileFormatParser.textToColumn("data/temp/julieInputText.txt", "data/temp/julieInputTextColumn.txt", Strings.SINGLE_SPACE_STRING);
        FileFormatParser.columnToSlash("data/temp/julieInputTextColumn.txt", "data/temp/julieTrainingSlash.txt", Strings.SINGLE_SPACE_STRING, "|");
        ArrayList<String> readFile = Utils.readFile(new File("data/temp/julieTrainingSlash.txt"));
        ArrayList<Sentence> arrayList = new ArrayList<>();
        Iterator<String> it = readFile.iterator();
        while (it.hasNext()) {
            try {
                arrayList.add(this.tagger.PPDtoUnits(it.next()));
            } catch (JNETException e) {
                LOGGER.error(getName() + " error in creating annotations: " + e.getMessage());
            }
        }
        File file = new File("data/temp/juliePredictionOutput.txt");
        try {
            ArrayList<String> predictIOB = this.tagger.predictIOB(arrayList, false);
            Utils.writeFile(file, predictIOB);
            Utils.writeFile(new File("data/temp/juliePredictionOutput_original.txt"), predictIOB);
            NerHelper.alignContent(file, str);
        } catch (Exception e2) {
            LOGGER.error(getName() + " error in creating annotations: " + e2.getMessage());
        }
        Annotations<ContextAnnotation> annotationsFromXmlText = FileFormatParser.getAnnotationsFromXmlText(NerHelper.alignContentText(FileHelper.readFileToString(file.getPath()), str));
        annotationsFromXmlText.removeNested();
        annotationsFromXmlText.sort();
        FileHelper.writeToFile("data/test/ner/julieOutput.txt", tagText(str, annotationsFromXmlText));
        return Collections.unmodifiableList(annotationsFromXmlText);
    }

    private File createTagsFile(String str, String str2) {
        Set<String> tagsFromColumnFile = FileFormatParser.getTagsFromColumnFile(str, str2);
        StringBuilder sb = new StringBuilder();
        Iterator<String> it = tagsFromColumnFile.iterator();
        while (it.hasNext()) {
            sb.append(it.next()).append(FileHelper.NEWLINE_CHARACTER);
        }
        if (!tagsFromColumnFile.contains("O")) {
            sb.append("O").append(FileHelper.NEWLINE_CHARACTER);
        }
        FileHelper.writeToFile("data/temp/julieTags.txt", sb);
        return new File("data/temp/julieTags.txt");
    }

    @Override // ws.palladian.extraction.entity.TrainableNamedEntityRecognizer
    public boolean train(String str, String str2) {
        FileHelper.writeToFile("data/temp/julieNerConfig.config", this.configFileContent);
        return train(str, str2, "data/temp/julieNerConfig.config");
    }

    public boolean train(String str, String str2, String str3) {
        FileFormatParser.removeWhiteSpaceInFirstColumn(str, "data/temp/julieTraining.txt", "_");
        FileFormatParser.columnToSlash("data/temp/julieTraining.txt", "data/temp/julieTraining.txt", LinearClassifier.TEXT_SERIALIZATION_DELIMITER, "|");
        String[] split = FileHelper.readFileToString("data/temp/julieTraining.txt").split(" \\.\\|O ");
        try {
            FileWriter fileWriter = new FileWriter("data/temp/julieTraining.txt");
            for (String str4 : split) {
                if (str4.trim().length() > 0) {
                    fileWriter.append((CharSequence) str4).append((CharSequence) " .|O\n");
                    fileWriter.flush();
                }
            }
            fileWriter.close();
        } catch (IOException e) {
            LOGGER.error(e.getMessage());
        }
        File file = new File("data/temp/julieTraining.txt");
        File createTagsFile = createTagsFile(str, LinearClassifier.TEXT_SERIALIZATION_DELIMITER);
        File file2 = str3.length() > 0 ? new File(str3) : null;
        ArrayList<String> readFile = Utils.readFile(file);
        ArrayList<Sentence> arrayList = new ArrayList<>();
        Tags tags = new Tags(createTagsFile.toString());
        NETagger nETagger = file2 != null ? new NETagger(file2) : new NETagger();
        Iterator<String> it = readFile.iterator();
        while (it.hasNext()) {
            try {
                arrayList.add(nETagger.PPDtoUnits(it.next()));
            } catch (JNETException e2) {
                e2.printStackTrace();
            }
        }
        nETagger.train(arrayList, tags);
        nETagger.writeModel(str2);
        return true;
    }

    @Override // ws.palladian.extraction.entity.NamedEntityRecognizer
    public String getName() {
        return "Julie NER";
    }

    public static void main(String[] strArr) throws Exception {
        JulieNer julieNer = new JulieNer();
        julieNer.train("data/datasets/ner/conll/training_verysmall.txt", "data/temp/julieNER.model");
        julieNer.loadModel("data/temp/julieNER.model");
        EvaluationResult evaluate = julieNer.evaluate("data/datasets/ner/tud/tud2011_test.txt", TaggingFormat.COLUMN);
        System.out.println(evaluate.getMUCResultsReadable());
        System.out.println(evaluate.getExactMatchResultsReadable());
    }
}
