package ws.palladian.retrieval.search.web;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import ws.palladian.helper.UrlHelper;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.constants.Language;
import ws.palladian.helper.html.HtmlHelper;
import ws.palladian.helper.nlp.StringHelper;
import ws.palladian.retrieval.HttpException;
import ws.palladian.retrieval.parser.json.JsonArray;
import ws.palladian.retrieval.parser.json.JsonException;
import ws.palladian.retrieval.parser.json.JsonObject;
import ws.palladian.retrieval.search.SearcherException;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/retrieval/search/web/WikipediaSearcher.class */
public final class WikipediaSearcher extends WebSearcher<WebResult> {
    private static final String NAME = "Wikipedia";
    private static final String DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ss'Z'";

    @Override // ws.palladian.retrieval.search.Searcher
    public String getName() {
        return NAME;
    }

    @Override // ws.palladian.retrieval.search.web.WebSearcher
    public List<WebResult> search(String str, int i, Language language) throws SearcherException {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        String baseUrl = getBaseUrl(language);
        for (int i2 = 0; i2 < i; i2 += 50) {
            try {
                JsonArray queryJsonArray = fetchJsonResponse(str, baseUrl, i2, 50).queryJsonArray("/query/search");
                if (queryJsonArray.size() == 0) {
                    break;
                }
                Iterator<Object> it = queryJsonArray.iterator();
                while (it.hasNext()) {
                    JsonObject jsonObject = (JsonObject) it.next();
                    String string = jsonObject.getString("title");
                    newArrayList.add(new WebResult(getPageUrl(baseUrl, string), string, HtmlHelper.stripHtmlTags(jsonObject.getString("snippet")), parseDate(jsonObject.getString("timestamp")), NAME));
                    if (newArrayList.size() == i) {
                        break;
                    }
                }
            } catch (Exception e) {
                throw new SearcherException("JSON parse error: " + e.getMessage(), e);
            }
        }
        return newArrayList;
    }

    private JsonObject fetchJsonResponse(String str, String str2, int i, int i2) throws SearcherException {
        String queryUrl = getQueryUrl(str2, str, i, i2);
        try {
            String stringContent = this.retriever.httpGet(queryUrl).getStringContent();
            try {
                return new JsonObject(stringContent);
            } catch (JsonException e) {
                throw new SearcherException("JSON parse error while parsing \"" + stringContent + "\": " + e.getMessage(), e);
            }
        } catch (HttpException e2) {
            throw new SearcherException("HTTP error while accessing \"" + queryUrl + "\": " + e2.getMessage(), e2);
        }
    }

    @Override // ws.palladian.retrieval.search.web.WebSearcher
    public long getTotalResultCount(String str, Language language) throws SearcherException {
        try {
            return fetchJsonResponse(str, getBaseUrl(language), 0, 1).queryLong("/query/searchinfo/totalhits");
        } catch (JsonException e) {
            throw new SearcherException("Error while getting the result count.");
        }
    }

    private Date parseDate(String str) {
        if (str == null) {
            return null;
        }
        try {
            return new SimpleDateFormat(DATE_PATTERN).parse(str);
        } catch (ParseException e) {
            return null;
        }
    }

    private String getQueryUrl(String str, String str2, int i, int i2) {
        StringBuilder sb = new StringBuilder();
        sb.append(str);
        sb.append("/w/api.php");
        sb.append("?action=query");
        sb.append("&list=search");
        sb.append("&format=json");
        if (i > 0) {
            sb.append("&sroffset=").append(i);
        }
        sb.append("&srlimit=").append(i2);
        sb.append("&srsearch=").append(UrlHelper.encodeParameter(str2));
        return sb.toString();
    }

    private String getPageUrl(String str, String str2) {
        return str + "/wiki/" + StringHelper.upperCaseFirstLetter(str2).replaceAll("\\s+$", "").replaceAll("\\s+", "_");
    }

    private String getBaseUrl(Language language) {
        switch (language) {
            case GERMAN:
                return "http://de.wikipedia.org";
            default:
                return "http://en.wikipedia.org";
        }
    }

    public static void main(String[] strArr) throws SearcherException {
        CollectionHelper.print(new WikipediaSearcher().search("dresden", 500, Language.GERMAN));
    }
}
