package ws.palladian.extraction.entity;

import com.aliasi.util.Strings;
import java.io.File;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.classification.text.evaluation.Dataset;
import ws.palladian.extraction.entity.evaluation.EvaluationResult;
import ws.palladian.extraction.entity.tagger.NerHelper;
import ws.palladian.extraction.feature.TextDocumentPipelineProcessor;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.processing.DocumentUnprocessableException;
import ws.palladian.processing.Tagger;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.Annotation;
import ws.palladian.processing.features.ListFeature;
import ws.palladian.processing.features.PositionAnnotationFactory;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/entity/NamedEntityRecognizer.class */
public abstract class NamedEntityRecognizer extends TextDocumentPipelineProcessor implements Tagger {
    protected static final Logger LOGGER = LoggerFactory.getLogger(NamedEntityRecognizer.class);
    public static final String PROVIDED_FEATURE = "ws.palladian.processing.entity.ner";
    private TaggingFormat taggingFormat = TaggingFormat.XML;

    @Override // ws.palladian.processing.Tagger
    public abstract List<? extends Annotation> getAnnotations(String str);

    public String tag(String str) {
        StopWatch stopWatch = new StopWatch();
        String tagText = tagText(str, getAnnotations(str));
        LOGGER.debug("tagged text in {}", stopWatch.getElapsedTimeString(false));
        return tagText;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String tagText(String str, List<? extends Annotation> list) {
        return NerHelper.tag(str, list, this.taggingFormat);
    }

    public EvaluationResult evaluate(Dataset dataset) {
        FileHelper.delete("data/temp/nerConcatenatedEvaluation.xml");
        Iterator<String> it = FileHelper.readFileToArray(dataset.getPath()).iterator();
        while (it.hasNext()) {
            FileHelper.concatenateFiles(new File("data/temp/nerConcatenatedEvaluation.xml"), new File(dataset.getRootPath() + it.next().split(Strings.SINGLE_SPACE_STRING)[0]));
        }
        return evaluate("data/temp/nerConcatenatedEvaluation.xml", TaggingFormat.XML);
    }

    public EvaluationResult evaluate(String str, TaggingFormat taggingFormat) {
        return evaluate(str, taggingFormat, Collections.emptySet());
    }

    public EvaluationResult evaluate(String str, TaggingFormat taggingFormat, Set<String> set) {
        Annotations<ContextAnnotation> annotations = FileFormatParser.getAnnotations(str, taggingFormat);
        annotations.sort();
        Annotations annotations2 = new Annotations(getAnnotations(FileFormatParser.getText(str, taggingFormat)));
        annotations2.removeNested();
        annotations2.sort();
        return evaluate(annotations, annotations2, set);
    }

    public static EvaluationResult evaluate(List<? extends Annotation> list, List<? extends Annotation> list2, Set<String> set) {
        EvaluationResult evaluationResult = new EvaluationResult(list);
        HashSet newHashSet = CollectionHelper.newHashSet();
        for (Annotation annotation : list2) {
            if (!annotation.getTag().equalsIgnoreCase("o")) {
                boolean z = false;
                int i = 0;
                Iterator<? extends Annotation> it = list.iterator();
                while (true) {
                    if (it.hasNext()) {
                        Annotation next = it.next();
                        i++;
                        if (!set.contains(next.getValue()) || (annotation.getStartPosition() < next.getEndPosition() && !z)) {
                            if (!annotation.congruent(next)) {
                                if (!annotation.overlaps(next)) {
                                    if (annotation.getStartPosition() < next.getEndPosition() || i == list.size()) {
                                        break;
                                    }
                                } else {
                                    newHashSet.add(next);
                                    if (annotation.sameTag(next)) {
                                        evaluationResult.add(EvaluationResult.ResultType.ERROR4, next, annotation);
                                    } else {
                                        evaluationResult.add(EvaluationResult.ResultType.ERROR5, next, annotation);
                                    }
                                    z = true;
                                }
                            } else {
                                newHashSet.add(next);
                                if (annotation.sameTag(next)) {
                                    evaluationResult.add(EvaluationResult.ResultType.CORRECT, next, annotation);
                                } else {
                                    evaluationResult.add(EvaluationResult.ResultType.ERROR3, next, annotation);
                                }
                            }
                        }
                    }
                }
                if (!z) {
                    evaluationResult.add(EvaluationResult.ResultType.ERROR1, null, annotation);
                }
            }
        }
        for (Annotation annotation2 : list) {
            if (!newHashSet.contains(annotation2)) {
                evaluationResult.add(EvaluationResult.ResultType.ERROR2, annotation2, null);
            }
        }
        return evaluationResult;
    }

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public void processDocument(TextDocument textDocument) throws DocumentUnprocessableException {
        List<? extends Annotation> annotations = getAnnotations((String) textDocument.getContent());
        PositionAnnotationFactory positionAnnotationFactory = new PositionAnnotationFactory(textDocument);
        ListFeature listFeature = new ListFeature(PROVIDED_FEATURE);
        for (Annotation annotation : annotations) {
            listFeature.add((ListFeature) positionAnnotationFactory.create(annotation.getStartPosition(), annotation.getEndPosition()));
        }
        textDocument.add(listFeature);
    }

    public abstract String getName();

    public void setTaggingFormat(TaggingFormat taggingFormat) {
        this.taggingFormat = taggingFormat;
    }

    public TaggingFormat getTaggingFormat() {
        return this.taggingFormat;
    }
}
