package ws.palladian.extraction.feature;

import com.aliasi.util.Strings;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import ws.palladian.extraction.token.BaseTokenizer;
import ws.palladian.helper.collection.CountMap;
import ws.palladian.processing.DocumentUnprocessableException;
import ws.palladian.processing.TextDocument;
import ws.palladian.processing.features.FeatureVector;
import ws.palladian.processing.features.ListFeature;
import ws.palladian.processing.features.NumericFeature;
import ws.palladian.processing.features.PositionAnnotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/TokenMetricsCalculator.class */
public final class TokenMetricsCalculator extends TextDocumentPipelineProcessor {
    public static final String FIRST = "ws.palladian.features.tokens.first";
    public static final String LAST = "ws.palladian.features.tokens.last";
    public static final String COUNT = "ws.palladian.features.tokens.count";
    public static final String FREQUENCY = "ws.palladian.features.tokens.frequency";
    public static final String SPREAD = "ws.palladian.features.tokens.spread";
    public static final String CHAR_LENGTH = "ws.palladian.features.tokens.length.char";
    public static final String WORD_LENGTH = "ws.palladian.features.tokens.length.word";

    @Override // ws.palladian.extraction.feature.TextDocumentPipelineProcessor
    public void processDocument(TextDocument textDocument) throws DocumentUnprocessableException {
        List<PositionAnnotation> list = (List) textDocument.getFeatureVector().get(ListFeature.class, BaseTokenizer.PROVIDED_FEATURE);
        CountMap create = CountMap.create();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        int i = 0;
        int i2 = 0;
        Iterator it = list.iterator();
        while (it.hasNext()) {
            String lowerCase = ((PositionAnnotation) it.next()).getValue().toLowerCase();
            create.add(lowerCase);
            if (i2 == -1) {
                throw new DocumentUnprocessableException("Token index is missing, looks like the used Tokenizer implementation needs to be updated for supplying indices.");
            }
            Integer num = (Integer) hashMap.get(lowerCase);
            if (num == null) {
                hashMap.put(lowerCase, Integer.valueOf(i2));
            } else {
                hashMap.put(lowerCase, Integer.valueOf(Math.min(i2, num.intValue())));
            }
            Integer num2 = (Integer) hashMap2.get(lowerCase);
            if (num2 == null) {
                hashMap2.put(lowerCase, Integer.valueOf(i2));
            } else {
                hashMap2.put(lowerCase, Integer.valueOf(Math.max(i2, num2.intValue())));
            }
            i = Math.max(i2, i);
            i2++;
        }
        int i3 = 1;
        Iterator it2 = create.uniqueItems().iterator();
        while (it2.hasNext()) {
            i3 = Math.max(i3, create.getCount((String) it2.next()));
        }
        for (PositionAnnotation positionAnnotation : list) {
            String lowerCase2 = positionAnnotation.getValue().toLowerCase();
            double intValue = ((Integer) hashMap.get(lowerCase2)).intValue() / i;
            double intValue2 = ((Integer) hashMap2.get(lowerCase2)).intValue() / i;
            double count = create.getCount(lowerCase2);
            double d = count / i3;
            double d2 = intValue2 - intValue;
            double length = lowerCase2.length();
            double length2 = lowerCase2.split(Strings.SINGLE_SPACE_STRING).length;
            FeatureVector featureVector = positionAnnotation.getFeatureVector();
            featureVector.add(new NumericFeature(FIRST, Double.valueOf(intValue)));
            featureVector.add(new NumericFeature(LAST, Double.valueOf(intValue2)));
            featureVector.add(new NumericFeature(COUNT, Double.valueOf(count)));
            featureVector.add(new NumericFeature(FREQUENCY, Double.valueOf(d)));
            featureVector.add(new NumericFeature(SPREAD, Double.valueOf(d2)));
            featureVector.add(new NumericFeature(CHAR_LENGTH, Double.valueOf(length)));
            featureVector.add(new NumericFeature(WORD_LENGTH, Double.valueOf(length2)));
        }
    }
}
