package ws.palladian.extraction.keyphrase.evaluation;

import edu.stanford.nlp.ling.CoreLabel;
import java.io.File;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import ws.palladian.helper.io.FileHelper;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/keyphrase/evaluation/DeliciousT140Handler.class */
final class DeliciousT140Handler extends DefaultHandler {
    private static final String TAG_TAG = "tag";
    private static final String TAG_NAME = "name";
    private final int minimumUsers;
    private final float minimumUserTagRatio;
    private final File indexFileOutput;
    private int entriesWritten;
    private String filename;
    private String filetype;
    private int users;
    private String currentTag;
    private int currentWeight;
    private static final String TAG_WEIGHT = "weight";
    private static final String TAG_USERS = "users";
    private static final String TAG_FILETYPE = "filetype";
    private static final String TAG_FILENAME = "filename";
    private static final List<String> TAGS = Arrays.asList(TAG_WEIGHT, "name", TAG_USERS, TAG_FILETYPE, TAG_FILENAME);
    private static final Pattern TAG_MATCH_PATTERN = Pattern.compile("[a-z0-9\\-\\.\\+\\#]+");
    private StringBuffer textBuffer = new StringBuffer();
    private boolean catchText = false;
    private Set<String> tags = new HashSet();

    public DeliciousT140Handler(File file, int i, float f) {
        if (file.exists()) {
            file.delete();
        }
        this.indexFileOutput = file;
        this.minimumUsers = i;
        this.minimumUserTagRatio = f;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        if (TAGS.contains(str3)) {
            this.catchText = true;
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        this.catchText = false;
        if (str3.equals(TAG_FILENAME)) {
            this.filename = getText();
            return;
        }
        if (str3.equals(TAG_FILETYPE)) {
            this.filetype = getText();
            return;
        }
        if (str3.equals(TAG_USERS)) {
            this.users = Integer.parseInt(getText());
            return;
        }
        if (str3.equals("name")) {
            this.currentTag = getText();
            return;
        }
        if (str3.equals(TAG_WEIGHT)) {
            this.currentWeight = Integer.parseInt(getText());
            return;
        }
        if (str3.equals(TAG_TAG)) {
            if ((((((float) this.currentWeight) / ((float) this.users)) > this.minimumUserTagRatio ? 1 : ((((float) this.currentWeight) / ((float) this.users)) == this.minimumUserTagRatio ? 0 : -1)) >= 0) && TAG_MATCH_PATTERN.matcher(this.currentTag).matches()) {
                this.tags.add(this.currentTag);
            }
        } else if (str3.equals("document")) {
            writeEntry();
            this.tags.clear();
        }
    }

    private void writeEntry() throws SAXException {
        String str = this.filename.substring(0, 2) + CoreLabel.TAG_SEPARATOR + this.filename;
        if (!(this.filetype.equals("html") & (this.users >= this.minimumUsers)) || !(!this.tags.isEmpty())) {
            return;
        }
        StringBuilder sb = new StringBuilder();
        sb.append(str).append('#');
        sb.append(StringUtils.join((Collection) this.tags, '#'));
        sb.append('\n');
        FileHelper.appendFile(this.indexFileOutput.getAbsolutePath(), sb);
        this.entriesWritten++;
        if (this.entriesWritten % 100 == 0) {
            System.out.println(this.entriesWritten);
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        if (this.catchText) {
            this.textBuffer.append(cArr, i, i2);
        }
    }

    private String getText() {
        try {
            String stringBuffer = this.textBuffer.toString();
            this.textBuffer = new StringBuffer();
            return stringBuffer;
        } catch (Throwable th) {
            this.textBuffer = new StringBuffer();
            throw th;
        }
    }
}
