package ws.palladian.extraction.keyphrase.features;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.jdesktop.swingx.JXLabel;
import ws.palladian.extraction.feature.DuplicateTokenConsolidator;
import ws.palladian.extraction.feature.StemmerAnnotator;
import ws.palladian.extraction.feature.TextDocumentPipelineProcessor;
import ws.palladian.extraction.token.BaseTokenizer;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.collection.CountMap;
import ws.palladian.helper.nlp.StringHelper;
import ws.palladian.processing.DocumentUnprocessableException;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.FeatureVector;
import ws.palladian.processing.features.ListFeature;
import ws.palladian.processing.features.NominalFeature;
import ws.palladian.processing.features.NumericFeature;
import ws.palladian.processing.features.PositionAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/keyphrase/features/AdditionalFeatureExtractor.class */
public final class AdditionalFeatureExtractor extends TextDocumentPipelineProcessor {
    public static final String STARTS_UPPERCASE_PERCENTAGE = "startsUppercase";
    public static final String COMPLETE_UPPERCASE = "completelyUppercase";
    public static final String NUMBER_PERCENTAGE = "containsNumbers";
    public static final String IS_NUMBER = "isNumber";
    public static final String PUNCTUATION_PERCENTAGE = "containsPunctuation";
    public static final String UNIQUE_CHARACTER_PERCENTAGE = "uniqueCharacterPercentage";
    public static final String CASE_SIGNATURE = "caseSignature";

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public void processDocument(TextDocument textDocument) throws DocumentUnprocessableException {
        List list = (List) textDocument.get(ListFeature.class, BaseTokenizer.PROVIDED_FEATURE);
        for (int i = 0; i < list.size(); i++) {
            PositionAnnotation positionAnnotation = (PositionAnnotation) list.get(i);
            String value = ((NominalFeature) positionAnnotation.getFeatureVector().get(NominalFeature.class, StemmerAnnotator.UNSTEM)).getValue();
            if (value == null) {
                throw new DocumentUnprocessableException("The necessary feature \"ws.palladian.features.unstem\" is missing for Annotation \"" + positionAnnotation.getValue() + "\"");
            }
            double startsUppercase = getStartsUppercase(positionAnnotation);
            double completeUppercase = getCompleteUppercase(positionAnnotation);
            double digitPercentage = getDigitPercentage(value);
            String caseSignature = getCaseSignature(positionAnnotation);
            String valueOf = String.valueOf(getIsNumber(value));
            double punctuationPercentage = getPunctuationPercentage(value);
            double uniqueCharacterPercentage = getUniqueCharacterPercentage(value);
            FeatureVector featureVector = positionAnnotation.getFeatureVector();
            featureVector.add(new NumericFeature(STARTS_UPPERCASE_PERCENTAGE, Double.valueOf(startsUppercase)));
            featureVector.add(new NumericFeature(COMPLETE_UPPERCASE, Double.valueOf(completeUppercase)));
            featureVector.add(new NumericFeature(NUMBER_PERCENTAGE, Double.valueOf(digitPercentage)));
            featureVector.add(new NominalFeature(IS_NUMBER, valueOf));
            featureVector.add(new NumericFeature(UNIQUE_CHARACTER_PERCENTAGE, Double.valueOf(punctuationPercentage)));
            featureVector.add(new NumericFeature(UNIQUE_CHARACTER_PERCENTAGE, Double.valueOf(uniqueCharacterPercentage)));
            featureVector.add(new NominalFeature(CASE_SIGNATURE, caseSignature));
        }
    }

    static double getUniqueCharacterPercentage(String str) {
        CountMap create = CountMap.create();
        for (int i = 0; i < str.length(); i++) {
            create.add(Character.valueOf(str.charAt(i)));
        }
        return create.uniqueSize() == 1 ? JXLabel.NORMAL : create.uniqueSize() / str.length();
    }

    static double getPunctuationPercentage(String str) {
        double d = 0.0d;
        for (int i = 0; i < str.length(); i++) {
            if (StringHelper.isPunctuation(str.charAt(i))) {
                d += 1.0d;
            }
        }
        return d / str.length();
    }

    private boolean getIsNumber(String str) {
        return StringHelper.isNumber(str);
    }

    static double getDigitPercentage(String str) {
        double d = 0.0d;
        for (int i = 0; i < str.length(); i++) {
            if (Character.isDigit(str.charAt(i))) {
                d += 1.0d;
            }
        }
        return d / str.length();
    }

    private double getCompleteUppercase(PositionAnnotation positionAnnotation) {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        newArrayList.add(positionAnnotation);
        newArrayList.addAll(DuplicateTokenConsolidator.getDuplicateAnnotations(positionAnnotation));
        double d = 0.0d;
        Iterator it = newArrayList.iterator();
        while (it.hasNext()) {
            if (StringUtils.isAllUpperCase(((NominalFeature) ((PositionAnnotation) it.next()).getFeatureVector().get(NominalFeature.class, StemmerAnnotator.UNSTEM)).getValue())) {
                d += 1.0d;
            }
        }
        if (d / newArrayList.size() > 0.5d) {
            return 1.0d;
        }
        return JXLabel.NORMAL;
    }

    private String getCaseSignature(PositionAnnotation positionAnnotation) {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        newArrayList.add(positionAnnotation);
        newArrayList.addAll(DuplicateTokenConsolidator.getDuplicateAnnotations(positionAnnotation));
        CountMap create = CountMap.create();
        Iterator it = newArrayList.iterator();
        while (it.hasNext()) {
            create.add(StringHelper.getCaseSignature(((NominalFeature) ((PositionAnnotation) it.next()).getFeatureVector().get(NominalFeature.class, StemmerAnnotator.UNSTEM)).getValue()));
        }
        return (String) create.getHighest();
    }

    private double getStartsUppercase(PositionAnnotation positionAnnotation) {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        newArrayList.add(positionAnnotation);
        newArrayList.addAll(DuplicateTokenConsolidator.getDuplicateAnnotations(positionAnnotation));
        double d = 0.0d;
        Iterator it = newArrayList.iterator();
        while (it.hasNext()) {
            if (StringHelper.startsUppercase(((NominalFeature) ((PositionAnnotation) it.next()).getFeatureVector().get(NominalFeature.class, StemmerAnnotator.UNSTEM)).getValue())) {
                d += 1.0d;
            }
        }
        if (d / newArrayList.size() > 0.5d) {
            return 1.0d;
        }
        return JXLabel.NORMAL;
    }
}
