package ws.palladian.semantics;

import com.aliasi.util.Strings;
import com.aliasi.xml.XHtmlWriter;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.h2.message.Trace;
import org.jdesktop.swingx.ws.yahoo.rss.YahooNews;
import ws.palladian.classification.utils.ClassificationUtils;
import ws.palladian.extraction.feature.StemmerAnnotator;
import ws.palladian.extraction.pos.BasePosTagger;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.constants.Language;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.nlp.StringHelper;
import ws.palladian.processing.features.Annotation;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/semantics/WordTransformer.class */
public class WordTransformer {
    private static final Map<String, String> IRREGULAR_NOUNS = new HashMap();
    private static final Map<String, EnglishVerb> IRREGULAR_VERBS = new HashMap();
    private static final StemmerAnnotator GERMAN_STEMMER = new StemmerAnnotator(Language.GERMAN);
    private static final StemmerAnnotator ENGLISH_STEMMER = new StemmerAnnotator(Language.ENGLISH);

    private static Map<String, String> getIrregularNouns() {
        return IRREGULAR_NOUNS;
    }

    public static String wordToSingular(String str, Language language) {
        if (language.equals(Language.ENGLISH)) {
            return wordToSingularEnglish(str);
        }
        if (language.equals(Language.GERMAN)) {
            throw new IllegalStateException("nix gut (needs to be restructured because of model paths).");
        }
        throw new IllegalArgumentException("Language must be 'en' or 'de'.");
    }

    public static String wordToSingularEnglish(String str) {
        if (str == null) {
            return "";
        }
        if (getIrregularNouns().containsValue(str)) {
            String str2 = (String) CollectionHelper.getKeyByValue(getIrregularNouns(), str);
            if (StringHelper.startsUppercase(str)) {
                str2 = StringHelper.upperCaseFirstLetter(str2);
            }
            return str2;
        }
        if (str.length() < 4) {
            return str;
        }
        if (str.toLowerCase().endsWith("ices")) {
            return str.substring(0, str.length() - 4) + "ix";
        }
        if (str.toLowerCase().endsWith("ies")) {
            return str.substring(0, str.length() - 3) + "y";
        }
        if (str.toLowerCase().endsWith("ves")) {
            char charAt = str.substring(str.length() - 3, str.length() - 2).charAt(0);
            String str3 = str.substring(0, str.length() - 3) + "f";
            if (!StringHelper.isVowel(charAt) && StringHelper.isVowel(str3.substring(str3.length() - 2, str3.length() - 1).charAt(0))) {
                str3 = str3 + "e";
            }
            return str3;
        }
        if (str.toLowerCase().endsWith("es") && str.length() >= 5) {
            String substring = str.substring(str.length() - 4, str.length() - 2);
            String substring2 = substring.substring(1);
            if (substring.equalsIgnoreCase("ss") || substring.equalsIgnoreCase("ch") || substring.equalsIgnoreCase("sh") || substring2.equalsIgnoreCase("x") || StringHelper.isVowel(substring2.charAt(0))) {
                return str.substring(0, str.length() - 2);
            }
        }
        return str.toLowerCase().endsWith("s") ? str.substring(0, str.length() - 1) : str;
    }

    public static String wordToPlural(String str, Language language) {
        if (language.equals(Language.ENGLISH)) {
            return wordToPluralEnglish(str);
        }
        if (language.equals(Language.GERMAN)) {
            throw new IllegalStateException("nix gut (needs to be restructured because of model paths).");
        }
        throw new IllegalArgumentException("Language must be 'en' or 'de'.");
    }

    public static String wordToPluralEnglish(String str) {
        if (str == null) {
            return "";
        }
        String str2 = "";
        String[] split = str.split(Strings.SINGLE_SPACE_STRING);
        if (split.length > 1) {
            str = split[split.length - 1];
            if (split.length > 1) {
                for (int i = 0; i < split.length - 1; i++) {
                    str2 = str2 + split[i] + Strings.SINGLE_SPACE_STRING;
                }
            }
        }
        if (getIrregularNouns().containsKey(str)) {
            String str3 = getIrregularNouns().get(str);
            if (StringHelper.startsUppercase(str)) {
                str3 = StringHelper.upperCaseFirstLetter(str3);
            }
            return str2 + str3;
        }
        if (str.length() < 3) {
            return str2 + str;
        }
        String substring = str.substring(str.length() - 1, str.length());
        String str4 = str.substring(str.length() - 2, str.length() - 1) + substring;
        return (str4.equalsIgnoreCase("ay") || str4.equalsIgnoreCase("ey") || str4.equalsIgnoreCase("iy") || str4.equalsIgnoreCase("oy") || str4.equalsIgnoreCase("uy")) ? str2 + str + "s" : substring.equalsIgnoreCase("y") ? str2 + str.substring(0, str.length() - 1) + "ies" : str4.equalsIgnoreCase("is") ? str2 + str.substring(0, str.length() - 2) + "es" : (substring.equalsIgnoreCase("s") || substring.equalsIgnoreCase("z") || substring.equalsIgnoreCase("x") || str4.equalsIgnoreCase("ch") || str4.equalsIgnoreCase("sh")) ? str2 + str + "es" : str2 + str + "s";
    }

    public static String stemGermanWords(String str) {
        return stemWords(str, Language.GERMAN);
    }

    public static String stemEnglishWords(String str) {
        return stemWords(str, Language.ENGLISH);
    }

    public static String stemWords(String str, Language language) {
        StringBuilder sb = new StringBuilder();
        String[] split = str.split(Strings.SINGLE_SPACE_STRING);
        for (int i = 0; i < split.length; i++) {
            if (language == Language.GERMAN) {
                sb.append(stemGermanWord(split[i]));
            } else if (language == Language.ENGLISH) {
                sb.append(stemEnglishWord(split[i]));
            }
            sb.append(Strings.SINGLE_SPACE_STRING);
        }
        return sb.toString().trim();
    }

    public static String stemGermanWord(String str) {
        return GERMAN_STEMMER.stem(str);
    }

    public static String stemEnglishWord(String str) {
        return ENGLISH_STEMMER.stem(str);
    }

    public static String getThirdPersonSingular(String str) {
        String lowerCase = str.toLowerCase();
        if (lowerCase.equals("be")) {
            return "is";
        }
        if (lowerCase.equals("was")) {
            return "was";
        }
        if (lowerCase.equals("been")) {
            return "been";
        }
        if (lowerCase.equals("have")) {
            return "has";
        }
        if (new HashSet(Arrays.asList("can", "could", "will", "would", "may", "might", "shall", "should", "must")).contains(lowerCase)) {
            return lowerCase;
        }
        EnglishVerb englishVerb = IRREGULAR_VERBS.get(stemEnglishWord(lowerCase));
        if (englishVerb != null) {
            if (englishVerb.getSimplePast().equals(lowerCase) || englishVerb.getPastParticiple().equals(lowerCase)) {
                return lowerCase;
            }
            lowerCase = englishVerb.getPresent();
        }
        if (englishVerb == null && lowerCase.endsWith("ed")) {
            return lowerCase;
        }
        char charAt = lowerCase.charAt(lowerCase.length() - 1);
        char charAt2 = lowerCase.charAt(lowerCase.length() - 2);
        return (lowerCase.endsWith("ch") || lowerCase.endsWith("sh") || lowerCase.endsWith("x") || lowerCase.endsWith("o")) ? lowerCase + "es" : (StringHelper.isVowel(charAt2) || !(lowerCase.endsWith("s") || lowerCase.endsWith("z"))) ? (StringHelper.isVowel(charAt2) && (lowerCase.endsWith("s") || lowerCase.endsWith("z"))) ? lowerCase + charAt + "es" : (StringHelper.isVowel(charAt2) || !lowerCase.endsWith("y")) ? lowerCase + "s" : lowerCase.replaceAll("y$", "ies") : lowerCase + "es";
    }

    public static String getSimplePresent(String str) {
        EnglishVerb englishVerb = IRREGULAR_VERBS.get(stemEnglishWord(str));
        return englishVerb != null ? englishVerb.getPresent() : str.endsWith("ed") ? str.replaceAll("ed$", "") : str;
    }

    public static String getSimplePast(String str) {
        EnglishVerb englishVerb = IRREGULAR_VERBS.get(stemEnglishWord(str));
        return englishVerb != null ? englishVerb.getSimplePast() : getRegularVerbPast(str);
    }

    private static String getRegularVerbPast(String str) {
        if (str.isEmpty()) {
            return str;
        }
        String lowerCase = str.toLowerCase();
        return lowerCase.endsWith("ed") ? lowerCase : lowerCase.endsWith("e") ? lowerCase + "d" : lowerCase.endsWith("y") ? lowerCase.replaceAll("y$", "ied") : (lowerCase.contains("qui") || lowerCase.contains("qua") || lowerCase.contains("quo") || lowerCase.contains("quu")) ? lowerCase.replaceAll("(.)$", "$1$1ed") : lowerCase + "ed";
    }

    public static String getPastParticiple(String str) {
        String stemEnglishWord = stemEnglishWord(str);
        EnglishVerb englishVerb = IRREGULAR_VERBS.get(stemEnglishWord);
        return englishVerb != null ? englishVerb.getPastParticiple() : getRegularVerbPast(stemEnglishWord);
    }

    public static EnglishTense getTense(String str, BasePosTagger basePosTagger) {
        return getTense(str, basePosTagger.getAnnotations(str));
    }

    public static EnglishTense getTense(String str, List<Annotation> list) {
        String lowerCase = str.toLowerCase();
        if (StringHelper.containsWord("do", lowerCase) || StringHelper.containsWord("don't", lowerCase) || StringHelper.containsWord("does", lowerCase) || StringHelper.containsWord("doesn't", lowerCase)) {
            return EnglishTense.SIMPLE_PRESENT;
        }
        if (StringHelper.containsWord("did", lowerCase) || StringHelper.containsWord("didn't", lowerCase)) {
            return EnglishTense.SIMPLE_PAST;
        }
        boolean z = StringHelper.containsWord("is", lowerCase) || StringHelper.containsWord("are", lowerCase);
        boolean z2 = StringHelper.containsWord("was", lowerCase) || StringHelper.containsWord("were", lowerCase);
        HashSet newHashSet = CollectionHelper.newHashSet();
        Iterator<Annotation> it = list.iterator();
        while (it.hasNext()) {
            newHashSet.add(it.next().getTag());
        }
        return (!newHashSet.contains("VBD") || z) ? (newHashSet.contains("HVD") && (newHashSet.contains("VBN") || newHashSet.contains("HVN"))) ? EnglishTense.PAST_PERFECT : (newHashSet.contains("HV") && (newHashSet.contains("VBN") || newHashSet.contains("HVN"))) ? EnglishTense.PRESENT_PERFECT : (!newHashSet.contains("VBN") || z) ? z2 ? EnglishTense.SIMPLE_PAST : EnglishTense.SIMPLE_PRESENT : EnglishTense.PRESENT_PERFECT : EnglishTense.SIMPLE_PAST;
    }

    public static void main(String[] strArr) {
        System.out.println(getThirdPersonSingular("cross"));
        System.exit(0);
        StopWatch stopWatch = new StopWatch();
        System.out.println(wordToSingular("women", Language.ENGLISH));
        System.out.println(wordToSingular("services", Language.ENGLISH));
        System.out.println(wordToSingular("series", Language.ENGLISH));
        System.out.println(wordToSingular("species", Language.ENGLISH));
        System.out.println(wordToSingular("automata", Language.ENGLISH));
        System.out.println(wordToSingular("archives", Language.ENGLISH));
        System.out.println(wordToSingular("Kleider", Language.GERMAN));
        System.out.println(wordToSingular("Getränke", Language.GERMAN));
        System.out.println(wordToSingular("Hüte", Language.GERMAN));
        System.out.println(wordToSingular("Häuser", Language.GERMAN));
        System.out.println(wordToSingular("Autos", Language.GERMAN));
        System.out.println(wordToSingular("Oktober", Language.GERMAN));
        System.out.println(wordToPlural("Kleid", Language.GERMAN));
        System.out.println(wordToPlural("Getränk", Language.GERMAN));
        System.out.println(wordToPlural("Hut", Language.GERMAN));
        System.out.println(wordToPlural("Haus", Language.GERMAN));
        System.out.println(wordToPlural("Auto", Language.GERMAN));
        System.out.println(wordToPlural("Oktober", Language.GERMAN));
        System.out.println(stopWatch.getElapsedTimeString());
    }

    static {
        InputStream inputStream = null;
        try {
            inputStream = WordTransformer.class.getResourceAsStream("/irregularEnglishVerbs.csv");
            Iterator<String> it = FileHelper.readFileToArray(inputStream).iterator();
            while (it.hasNext()) {
                String[] split = it.next().split(ClassificationUtils.DEFAULT_SEPARATOR);
                EnglishVerb englishVerb = new EnglishVerb(split[0], split[1], split[2]);
                IRREGULAR_VERBS.put(split[0], englishVerb);
                IRREGULAR_VERBS.put(split[1], englishVerb);
                IRREGULAR_VERBS.put(split[2], englishVerb);
            }
            FileHelper.close(inputStream);
            IRREGULAR_NOUNS.put("addendum", "addenda");
            IRREGULAR_NOUNS.put("alga", "algae");
            IRREGULAR_NOUNS.put("alumna", "alumnae");
            IRREGULAR_NOUNS.put("alumnus", "alumni");
            IRREGULAR_NOUNS.put("analysis", "analyses");
            IRREGULAR_NOUNS.put("antennas", "antenna");
            IRREGULAR_NOUNS.put("apparatus", "apparatuses");
            IRREGULAR_NOUNS.put("appendix", "appendices");
            IRREGULAR_NOUNS.put(XHtmlWriter.ARCHIVE, "archives");
            IRREGULAR_NOUNS.put("automaton", "automata");
            IRREGULAR_NOUNS.put(XHtmlWriter.AXIS, "axes");
            IRREGULAR_NOUNS.put("bacillus", "bacilli");
            IRREGULAR_NOUNS.put("bacterium", "bacteria");
            IRREGULAR_NOUNS.put("basis", "bases");
            IRREGULAR_NOUNS.put("beau", "beaux");
            IRREGULAR_NOUNS.put("bison", "bison");
            IRREGULAR_NOUNS.put("calf", "calves");
            IRREGULAR_NOUNS.put("child", "children");
            IRREGULAR_NOUNS.put("corps", "corps");
            IRREGULAR_NOUNS.put("crisis", "crises");
            IRREGULAR_NOUNS.put("criterion", "criteria");
            IRREGULAR_NOUNS.put("curriculum", "curricula");
            IRREGULAR_NOUNS.put("datum", XHtmlWriter.DATA);
            IRREGULAR_NOUNS.put("deer", "deer");
            IRREGULAR_NOUNS.put("die", "dice");
            IRREGULAR_NOUNS.put("diagnosis", "diagnoses");
            IRREGULAR_NOUNS.put("echo", "echoes");
            IRREGULAR_NOUNS.put("elf", "elves");
            IRREGULAR_NOUNS.put("ellipsis", "ellipses");
            IRREGULAR_NOUNS.put("embargo", "embargoes");
            IRREGULAR_NOUNS.put("emphasis", "emphases");
            IRREGULAR_NOUNS.put("erratum", "errata");
            IRREGULAR_NOUNS.put("fireman", "firemen");
            IRREGULAR_NOUNS.put("fish", "fish");
            IRREGULAR_NOUNS.put("foot", "feet");
            IRREGULAR_NOUNS.put("fungus", "fungi");
            IRREGULAR_NOUNS.put("genus", "genera");
            IRREGULAR_NOUNS.put("goose", "geese");
            IRREGULAR_NOUNS.put("half", "halves");
            IRREGULAR_NOUNS.put("hero", "heroes");
            IRREGULAR_NOUNS.put("hippopotamus", "hippopotami");
            IRREGULAR_NOUNS.put("hypothesis", "hypotheses");
            IRREGULAR_NOUNS.put(Trace.INDEX, "indices");
            IRREGULAR_NOUNS.put("information", "information");
            IRREGULAR_NOUNS.put("knife", "knives");
            IRREGULAR_NOUNS.put("leaf", "leaves");
            IRREGULAR_NOUNS.put("life", "lives");
            IRREGULAR_NOUNS.put("loaf", "loaves");
            IRREGULAR_NOUNS.put("louse", "lice");
            IRREGULAR_NOUNS.put("man", "men");
            IRREGULAR_NOUNS.put("matrix", "matrices");
            IRREGULAR_NOUNS.put("means", "means");
            IRREGULAR_NOUNS.put("medium", XHtmlWriter.MEDIA);
            IRREGULAR_NOUNS.put("memorandum", "memoranda");
            IRREGULAR_NOUNS.put("millennium", "milennia");
            IRREGULAR_NOUNS.put("moose", "moose");
            IRREGULAR_NOUNS.put("mosquito", "mosquitoes");
            IRREGULAR_NOUNS.put("mouse", "mice");
            IRREGULAR_NOUNS.put("movie", YahooNews.MOVIES);
            IRREGULAR_NOUNS.put("neurosis", "neuroses");
            IRREGULAR_NOUNS.put("news", "news");
            IRREGULAR_NOUNS.put("nucleus", "nuclei");
            IRREGULAR_NOUNS.put("oasis", "oases");
            IRREGULAR_NOUNS.put("ovum", "ova");
            IRREGULAR_NOUNS.put("ox", "oxen");
            IRREGULAR_NOUNS.put("paralysis", "paralyses");
            IRREGULAR_NOUNS.put("parenthesis", "parentheses");
            IRREGULAR_NOUNS.put("person", "people");
            IRREGULAR_NOUNS.put("phenomenon", "phenomena");
            IRREGULAR_NOUNS.put("pike", "pike");
            IRREGULAR_NOUNS.put("potato", "potatoes");
            IRREGULAR_NOUNS.put("radius", "radiuses");
            IRREGULAR_NOUNS.put("salmon", "salmon");
            IRREGULAR_NOUNS.put("scissors", "scissors");
            IRREGULAR_NOUNS.put("series", "series");
            IRREGULAR_NOUNS.put("service", "services");
            IRREGULAR_NOUNS.put("sheep", "sheep");
            IRREGULAR_NOUNS.put("shelf", "shelves");
            IRREGULAR_NOUNS.put("shrimp", "shrimp");
            IRREGULAR_NOUNS.put("species", "species");
            IRREGULAR_NOUNS.put("status", "status");
            IRREGULAR_NOUNS.put("stimulus", "stimuli");
            IRREGULAR_NOUNS.put("stratum", "strata");
            IRREGULAR_NOUNS.put("swine", "swine");
            IRREGULAR_NOUNS.put("syllabus", "syllabuses");
            IRREGULAR_NOUNS.put("symposium", "symposia");
            IRREGULAR_NOUNS.put("synthesis", "syntheses");
            IRREGULAR_NOUNS.put("synopsis", "synopses");
            IRREGULAR_NOUNS.put("tableau", "tableaux");
            IRREGULAR_NOUNS.put("thesis", "theses");
            IRREGULAR_NOUNS.put("thief", "thieves");
            IRREGULAR_NOUNS.put("tomato", "tomatoes");
            IRREGULAR_NOUNS.put("tooth", "teeth");
            IRREGULAR_NOUNS.put("torpedo", "torpedoes");
            IRREGULAR_NOUNS.put("trout", "trout");
            IRREGULAR_NOUNS.put("vertebra", "vertebrae");
            IRREGULAR_NOUNS.put("vertex", "vertices");
            IRREGULAR_NOUNS.put("veto", "vetoes");
            IRREGULAR_NOUNS.put("vita", "vitae");
            IRREGULAR_NOUNS.put("wife", "wives");
            IRREGULAR_NOUNS.put("wolf", "wolves");
            IRREGULAR_NOUNS.put("woman", "women");
        } catch (Throwable th) {
            FileHelper.close(inputStream);
            throw th;
        }
    }
}
