package ws.palladian.retrieval.feeds.discovery;

import com.aliasi.util.Strings;
import edu.stanford.nlp.ling.CoreLabel;
import java.util.ArrayList;
import java.util.List;
import ws.palladian.helper.collection.CountMap;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.LineAction;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/retrieval/feeds/discovery/QueryCompiler.class */
public class QueryCompiler {
    public static void main(String[] strArr) {
        List<String> readQueriesFromDmoz = readQueriesFromDmoz("/home/pk/Desktop/categories.txt", -1, 2);
        System.out.println(readQueriesFromDmoz);
        FileHelper.writeToFile("/home/pk/Desktop/newsseecrQueries_2011-08-04.txt", readQueriesFromDmoz);
    }

    public static List<String> readQueriesFromDmoz(String str) {
        return readQueriesFromDmoz(str, -1, Integer.MAX_VALUE);
    }

    public static List<String> readQueriesFromDmoz(String str, int i, final int i2) {
        final CountMap create = CountMap.create();
        FileHelper.performActionOnEveryLine(str, new LineAction() { // from class: ws.palladian.retrieval.feeds.discovery.QueryCompiler.1
            @Override // ws.palladian.helper.io.LineAction
            public void performAction(String str2, int i3) {
                String[] split = str2.replace("_", Strings.SINGLE_SPACE_STRING).split(CoreLabel.TAG_SEPARATOR);
                for (int i4 = 0; i4 < Math.min(i2, split.length); i4++) {
                    String str3 = split[i4];
                    if (str3.length() > 1) {
                        create.add(str3);
                    }
                }
                if (i3 % 10000 == 0) {
                    System.out.println(i3);
                }
            }
        });
        ArrayList arrayList = new ArrayList();
        for (String str2 : create.uniqueItems()) {
            if (create.getCount(str2) > i) {
                arrayList.add(str2);
            }
        }
        System.out.println("# items " + arrayList.size());
        return arrayList;
    }
}
