package ws.palladian.extraction.date;

import com.aliasi.xml.XHtmlWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.Validate;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import ws.palladian.extraction.date.comparators.RatedDateComparator;
import ws.palladian.extraction.date.dates.ContentDate;
import ws.palladian.extraction.date.dates.RatedDate;
import ws.palladian.extraction.date.getter.ContentDateGetter;
import ws.palladian.extraction.date.helper.DateExtractionHelper;
import ws.palladian.extraction.date.rater.ContentDateRater;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.date.DateParser;
import ws.palladian.helper.date.ExtractedDate;
import ws.palladian.helper.html.HtmlHelper;
import ws.palladian.helper.html.XPathHelper;
import ws.palladian.retrieval.HttpException;
import ws.palladian.retrieval.HttpRetrieverFactory;
import ws.palladian.retrieval.parser.ParserException;
import ws.palladian.retrieval.parser.ParserFactory;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/date/WebPageDateEvaluator.class */
public final class WebPageDateEvaluator {
    private WebPageDateEvaluator() {
    }

    public static List<RatedDate<ExtractedDate>> getDates(Document document, PageDateType pageDateType) {
        Validate.notNull(document, "document must not be null", new Object[0]);
        Validate.notNull(pageDateType, "type must not be null", new Object[0]);
        List<RatedDate<ExtractedDate>> rate = rate(new ContentDateGetter().getDates(document), pageDateType);
        Collections.sort(rate, RatedDateComparator.INSTANCE);
        return rate;
    }

    public static RatedDate<ExtractedDate> getBestDate(Document document, PageDateType pageDateType) {
        Validate.notNull(document, "document must not be null", new Object[0]);
        Validate.notNull(pageDateType, "type must not be null", new Object[0]);
        List<RatedDate<ExtractedDate>> dates = getDates(document, pageDateType);
        if (dates.size() > 0) {
            return dates.get(0);
        }
        return null;
    }

    public static ExtractedDate getBestPubDateHtml5(Document document) {
        Node namedItem;
        Validate.notNull(document, "document must not be null", new Object[0]);
        Node node = null;
        int i = -1;
        for (Node node2 : XPathHelper.getXhtmlNodes(document, "//article")) {
            int length = HtmlHelper.getInnerXml(node2).length();
            if (length > i) {
                node = node2;
                i = length;
            }
        }
        if (node == null) {
            node = document;
        }
        if (node == null) {
            return null;
        }
        Iterator<Node> it = XPathHelper.getXhtmlNodes(node, ".//time").iterator();
        while (it.hasNext()) {
            NamedNodeMap attributes = it.next().getAttributes();
            if (attributes.getNamedItem("pubdate") != null && (namedItem = attributes.getNamedItem(XHtmlWriter.DATETIME)) != null) {
                return DateParser.findDate(namedItem.getTextContent());
            }
        }
        return null;
    }

    public static RatedDate<ExtractedDate> getBestDate(String str, PageDateType pageDateType) {
        Validate.notEmpty(str, "url must not be empty", new Object[0]);
        Validate.notNull(pageDateType, "type must not be null", new Object[0]);
        try {
            return getBestDate(ParserFactory.createHtmlParser().parse(HttpRetrieverFactory.getHttpRetriever().httpGet(str)), pageDateType);
        } catch (HttpException e) {
            return null;
        } catch (ParserException e2) {
            return null;
        }
    }

    public static List<RatedDate<ExtractedDate>> rate(List<? extends ExtractedDate> list, PageDateType pageDateType) {
        Validate.notNull(list, "extractedDates must not be null", new Object[0]);
        Validate.notNull(pageDateType, "type must not be null", new Object[0]);
        ArrayList newArrayList = CollectionHelper.newArrayList();
        newArrayList.addAll(new ContentDateRater(pageDateType).rate(DateExtractionHelper.filterFullDate(DateExtractionHelper.filter(DateExtractionHelper.filterByRange(list), ContentDate.class))));
        return newArrayList;
    }
}
