package ws.palladian.extraction.content;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import ws.palladian.helper.UrlHelper;
import ws.palladian.retrieval.HttpException;
import ws.palladian.retrieval.HttpRetriever;
import ws.palladian.retrieval.HttpRetrieverFactory;
import ws.palladian.retrieval.helper.JsonObjectWrapper;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/content/ExtractivContentExtractor.class */
public class ExtractivContentExtractor extends WebPageContentExtractor {
    private String extractedTitle = "";
    private String extractedResult = "";
    private final HttpRetriever httpRetriever = HttpRetrieverFactory.getHttpRetriever();

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public WebPageContentExtractor setDocument(String str) throws PageContentExtractorException {
        try {
            this.extractedResult = this.httpRetriever.httpGet(buildRequestUrl(str)).getStringContent();
            JsonObjectWrapper jsonObjectWrapper = new JsonObjectWrapper(this.extractedResult);
            this.extractedResult = jsonObjectWrapper.getJSONObject("Document").getString("text");
            this.extractedTitle = jsonObjectWrapper.getJSONObject("Document").getString("title");
            return this;
        } catch (HttpException e) {
            throw new PageContentExtractorException("Error when contacting API for URL \"" + str + "\": " + e.getMessage(), e);
        }
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public WebPageContentExtractor setDocument(Document document) throws PageContentExtractorException {
        return setDocument(document.getDocumentURI());
    }

    private String buildRequestUrl(String str) {
        return String.format("http://rest.extractiv.com/extractiv/?url=%s&output_format=json", UrlHelper.encodeParameter(str));
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public Node getResultNode() {
        throw new UnsupportedOperationException("The ExtractivContentExtractor does not support this method");
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public String getResultText() {
        return this.extractedResult;
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public String getResultTitle() {
        return this.extractedTitle;
    }

    @Override // ws.palladian.extraction.content.WebPageContentExtractor
    public String getExtractorName() {
        return "Extractiv Content Extractor";
    }

    public static void main(String[] strArr) {
        ExtractivContentExtractor extractivContentExtractor = new ExtractivContentExtractor();
        String resultText = extractivContentExtractor.getResultText("http://www.bbc.com/travel/feature/20121108-irelands-outlying-islands");
        System.out.println("title: " + extractivContentExtractor.getResultTitle());
        System.out.println("text: " + resultText);
    }
}
