package ws.palladian.retrieval.analysis;

import java.util.ArrayList;
import java.util.List;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.nlp.StringHelper;
import ws.palladian.retrieval.DocumentRetriever;
import ws.palladian.retrieval.HttpRetriever;
import ws.palladian.retrieval.HttpRetrieverFactory;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/retrieval/analysis/SitemapRetriever.class */
public class SitemapRetriever {
    public List<String> getUrls(String str) {
        ArrayList newArrayList = CollectionHelper.newArrayList();
        HttpRetriever httpRetriever = HttpRetrieverFactory.getHttpRetriever();
        int i = 1;
        for (String str2 : StringHelper.getRegexpMatches("(?<=loc\\>).*?(?=\\</loc)", new DocumentRetriever(httpRetriever).getText(str))) {
            String str3 = "data/temp/sitemap" + i + ".xml.compressed";
            String replace = str3.replace(".compressed", "");
            httpRetriever.downloadAndSave(str2, str3);
            FileHelper.ungzipFile(str3, replace);
            newArrayList.addAll(StringHelper.getRegexpMatches("(?<=loc\\>).*?(?=\\</loc)", FileHelper.readFileToString(replace)));
            i++;
        }
        return newArrayList;
    }
}
