package ws.palladian.retrieval.parser;

import java.io.IOException;
import org.apache.xerces.parsers.DOMParser;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.cyberneko.html.HTMLConfiguration;
import org.cyberneko.html.HTMLTagBalancerFixed;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;

@Deprecated
/* loaded from: input_file:PalladianNodes.jar:ws/palladian/retrieval/parser/NekoHtmlParser.class */
public final class NekoHtmlParser extends BaseDocumentParser implements DocumentParser {
    @Override // ws.palladian.retrieval.parser.DocumentParser
    public Document parse(InputSource inputSource) throws ParserException {
        DOMParser dOMParser = new DOMParser(new HTMLConfiguration());
        try {
            dOMParser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            dOMParser.setFeature("http://cyberneko.org/html/features/insert-namespaces", true);
            dOMParser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
            dOMParser.setProperty("http://cyberneko.org/html/properties/filters", new XMLDocumentFilter[]{new HTMLTagBalancerFixed(), new PreflightFilter()});
            dOMParser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");
            dOMParser.parse(inputSource);
            return dOMParser.getDocument();
        } catch (IOException e) {
            throw new ParserException(e);
        } catch (SAXNotRecognizedException e2) {
            throw new ParserException(e2);
        } catch (SAXNotSupportedException e3) {
            throw new ParserException(e3);
        } catch (SAXException e4) {
            throw new ParserException(e4);
        } catch (Throwable th) {
            throw new ParserException(th);
        }
    }
}
