package de.julielab.jnet.tagger;

import com.aliasi.util.Strings;
import com.uea.stemmer.UEALite;
import de.julielab.jnet.utils.Utils;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.LabelAlphabet;
import edu.umass.cs.mallet.base.types.LabelSequence;
import edu.umass.cs.mallet.base.types.Token;
import edu.umass.cs.mallet.base.types.TokenSequence;
import java.util.ArrayList;
import java.util.Properties;

/* loaded from: input_file:lib/palladian.jar:de/julielab/jnet/tagger/BasePipe.class */
class BasePipe extends Pipe {
    private static final long serialVersionUID = 1;
    UEALite stemmer;
    Properties featureConfig;
    boolean doStemming;

    public BasePipe(Properties properties) {
        super((Class) null, LabelAlphabet.class);
        this.stemmer = new UEALite();
        this.doStemming = properties.getProperty("stemming_enabled").equals("true");
        this.featureConfig = properties;
    }

    public Instance pipe(Instance instance) {
        ArrayList<Unit> units = ((Sentence) instance.getData()).getUnits();
        StringBuffer stringBuffer = new StringBuffer();
        TokenSequence tokenSequence = new TokenSequence(units.size());
        LabelSequence labelSequence = new LabelSequence(getTargetAlphabet(), units.size());
        String[] trueMetas = Utils.getTrueMetas(this.featureConfig);
        for (int i = 0; i < units.size(); i++) {
            try {
                String rep = units.get(i).getRep();
                if (this.doStemming) {
                    rep = this.stemmer.stem(rep).getWord();
                }
                Token token = new Token(rep);
                token.setFeatureValue("W=" + rep, 1.0d);
                for (String str : trueMetas) {
                    String property = this.featureConfig.getProperty(str + "_feat_unit");
                    String metaInfo = units.get(i).getMetaInfo(property);
                    if (metaInfo != null) {
                        token.setFeatureValue(property + "=" + metaInfo, 1.0d);
                    }
                }
                token.setText(rep);
                if (this.featureConfig.getProperty("feat_wc_enabled").equals("true")) {
                    token.setFeatureValue("WC=" + rep.replaceAll("[A-Z]", "A").replaceAll("[a-z]", "a").replaceAll("[0-9]", "0").replaceAll("[^A-Za-z0-9]", "x"), 1.0d);
                }
                if (this.featureConfig.getProperty("feat_bwc_enabled").equals("true")) {
                    token.setFeatureValue("BWC=" + rep.replaceAll("[A-Z]+", "A").replaceAll("[a-z]+", "a").replaceAll("[0-9]+", "0").replaceAll("[^A-Za-z0-9]+", "x"), 1.0d);
                }
                stringBuffer.append(token.getText());
                stringBuffer.append(Strings.SINGLE_SPACE_STRING);
                tokenSequence.add(token);
                labelSequence.add(units.get(i).getLabel());
            } catch (Exception e) {
                e.printStackTrace();
                System.exit(1);
                return null;
            }
        }
        if (labelSequence.size() != tokenSequence.size()) {
            throw new JNETException("Label not found... check your label definition file.");
        }
        instance.setData(tokenSequence);
        instance.setTarget(labelSequence);
        instance.setSource(stringBuffer);
        return instance;
    }
}
