package ws.palladian.extraction.feature;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import ws.palladian.helper.collection.CountMap;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.LineAction;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/TermCorpus.class */
public final class TermCorpus {
    private static final String SEPARATOR = "#";
    private int numDocs;
    private final CountMap<String> terms;

    public TermCorpus() {
        this(CountMap.create(), 0);
    }

    public TermCorpus(CountMap<String> countMap, int i) {
        this.numDocs = i;
        this.terms = countMap;
    }

    public void addTermsFromDocument(Set<String> set) {
        this.terms.addAll(set);
        this.numDocs++;
    }

    public int getCount(String str) {
        return this.terms.getCount(str);
    }

    public double getIdf(String str, boolean z) {
        int i = z ? 1 : 0;
        return (getNumDocs() + i) / (getCount(str) + i);
    }

    public int getNumDocs() {
        return this.numDocs;
    }

    public int getNumTerms() {
        return this.terms.totalSize();
    }

    public int getNumUniqueTerms() {
        return this.terms.uniqueSize();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void setDf(String str, int i) {
        this.terms.set(str, i);
    }

    public void load(String str) throws IOException {
        GZIPInputStream gZIPInputStream = null;
        try {
            gZIPInputStream = new GZIPInputStream(new FileInputStream(new File(str)));
            FileHelper.performActionOnEveryLine(gZIPInputStream, new LineAction() { // from class: ws.palladian.extraction.feature.TermCorpus.1
                @Override // ws.palladian.helper.io.LineAction
                public void performAction(String str2, int i) {
                    if (i % 100000 == 0) {
                        System.out.println(i);
                    }
                    if (i > 1) {
                        String[] split = str2.split(TermCorpus.SEPARATOR);
                        if (split.length != 2) {
                            return;
                        }
                        TermCorpus.this.setDf(split[0], Integer.parseInt(split[1]));
                        return;
                    }
                    if (str2.startsWith("numDocs#")) {
                        String[] split2 = str2.split(TermCorpus.SEPARATOR);
                        TermCorpus.this.numDocs = Integer.parseInt(split2[1]);
                    }
                }
            });
            FileHelper.close(gZIPInputStream);
        } catch (Throwable th) {
            FileHelper.close(gZIPInputStream);
            throw th;
        }
    }

    public void save(File file) throws IOException {
        GZIPOutputStream gZIPOutputStream = null;
        PrintWriter printWriter = null;
        try {
            gZIPOutputStream = new GZIPOutputStream(new FileOutputStream(file));
            printWriter = new PrintWriter(gZIPOutputStream);
            printWriter.println("numDocs#" + getNumDocs());
            printWriter.println();
            for (String str : this.terms.uniqueItems()) {
                printWriter.println(str + SEPARATOR + this.terms.getCount(str));
            }
            FileHelper.close(printWriter, gZIPOutputStream);
        } catch (Throwable th) {
            FileHelper.close(printWriter, gZIPOutputStream);
            throw th;
        }
    }

    public void reset() {
        this.numDocs = 0;
        this.terms.clear();
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("TermCorpus");
        sb.append(" numDocs=").append(getNumDocs());
        sb.append(" numUniqueTerms=").append(this.terms.uniqueSize());
        sb.append(" numTerms=").append(this.terms.totalSize());
        return sb.toString();
    }
}
