package ws.palladian.retrieval.wikipedia;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.helper.collection.CollectionHelper;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/retrieval/wikipedia/WikipediaPage.class */
public class WikipediaPage {
    private static final Logger LOGGER = LoggerFactory.getLogger(WikipediaPage.class);
    public static final int MAIN_NAMESPACE = 0;
    private final int pageId;
    private final int namespaceId;
    private final String title;
    private final String text;

    /* loaded from: input_file:lib/palladian.jar:ws/palladian/retrieval/wikipedia/WikipediaPage$WikipediaInfobox.class */
    public static class WikipediaInfobox {
        private final String name;
        private final Map<String, String> content;

        public WikipediaInfobox(String str, Map<String, String> map) {
            this.name = str;
            this.content = map;
        }

        public String getName() {
            return this.name;
        }

        public String getEntry(String str) {
            return this.content.get(str);
        }

        public String getEntry(String... strArr) {
            return (String) CollectionHelper.getTrying(this.content, strArr);
        }

        public int size() {
            return this.content.size();
        }

        public String toString() {
            return "WikipediaInfobox [name=" + this.name + ", content=" + this.content + DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END;
        }
    }

    /* loaded from: input_file:lib/palladian.jar:ws/palladian/retrieval/wikipedia/WikipediaPage$WikipediaLink.class */
    public static class WikipediaLink {
        private final String destination;
        private final String title;

        public WikipediaLink(String str, String str2) {
            this.destination = str;
            this.title = str2;
        }

        public String getDestination() {
            return this.destination;
        }

        public String getTitle() {
            return this.title;
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("WikipediaLink [");
            sb.append("destination=");
            sb.append(this.destination);
            if (this.title != null) {
                sb.append(", title=");
                sb.append(this.title);
            }
            sb.append(DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END);
            return sb.toString();
        }
    }

    public WikipediaPage(int i, int i2, String str, String str2) {
        this.pageId = i;
        this.namespaceId = i2;
        this.title = str;
        this.text = str2;
    }

    public int getPageId() {
        return this.pageId;
    }

    public int getNamespaceId() {
        return this.namespaceId;
    }

    public String getTitle() {
        return this.title;
    }

    public String getText() {
        return this.text;
    }

    public List<String> getSections() {
        return WikipediaUtil.getSections(this.text);
    }

    public boolean isRedirect() {
        return getRedirectTitle() != null;
    }

    public String getRedirectTitle() {
        return WikipediaUtil.getRedirect(this.text);
    }

    @Deprecated
    public String getInfoboxMarkup() {
        try {
            return (String) CollectionHelper.getFirst(WikipediaUtil.getNamedMarkup(this.text, "infobox"));
        } catch (StringIndexOutOfBoundsException e) {
            LOGGER.warn("{} when getting infobox markup; this is usually caused by invalid markup.", e.getMessage());
            return null;
        }
    }

    @Deprecated
    public String getInfoboxType() {
        String infoboxMarkup = getInfoboxMarkup();
        if (infoboxMarkup == null) {
            return null;
        }
        return getInfoboxType(infoboxMarkup);
    }

    private static final String getInfoboxType(String str) {
        Matcher matcher = Pattern.compile("(?:infobox|geobox)[\\s|]([^|<}]+)").matcher(str.toLowerCase());
        if (matcher.find()) {
            return matcher.group(1).trim();
        }
        return null;
    }

    public List<WikipediaInfobox> getInfoboxes() {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        try {
            for (String str : WikipediaUtil.getNamedMarkup(this.text, "infobox", "geobox")) {
                newArrayList.add(new WikipediaInfobox(getInfoboxType(str), WikipediaUtil.extractTemplate(str)));
            }
        } catch (StringIndexOutOfBoundsException e) {
            LOGGER.warn("{} when getting infobox markup; this is usually caused by invalid markup.", e.getMessage());
        }
        return newArrayList;
    }

    public String getCleanTitle() {
        return WikipediaUtil.cleanTitle(this.title);
    }

    public List<String> getCategories() {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        Matcher matcher = Pattern.compile("\\[\\[Category:([^|\\]]*)(?:\\|[^|\\]]*)?\\]\\]").matcher(this.text);
        while (matcher.find()) {
            newArrayList.add(matcher.group(1));
        }
        return newArrayList;
    }

    public boolean isDisambiguation() {
        if (this.title.endsWith("(disambiguation)")) {
            return true;
        }
        String lowerCase = this.text.toLowerCase();
        return lowerCase.contains("{{disambig") || lowerCase.contains("{{hndis") || lowerCase.contains("{{geodis");
    }

    public List<WikipediaLink> getLinks() {
        return WikipediaUtil.getLinks(this.text);
    }

    public String toString() {
        return "WikipediaPage [pageId=" + this.pageId + ", namespaceId=" + this.namespaceId + ", title=" + this.title + ", redirectTitle=" + getRedirectTitle() + DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END;
    }
}
