package ws.palladian.nodes.retrieval.parser;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import org.knime.core.data.DataCell;
import org.knime.core.data.DataColumnSpecCreator;
import org.knime.core.data.DataRow;
import org.knime.core.data.DataTableSpec;
import org.knime.core.data.DataType;
import org.knime.core.data.StringValue;
import org.knime.core.data.container.ColumnRearranger;
import org.knime.core.data.container.SingleCellFactory;
import org.knime.core.data.xml.XMLCell;
import org.knime.core.data.xml.XMLCellFactory;
import org.knime.core.node.BufferedDataTable;
import org.knime.core.node.CanceledExecutionException;
import org.knime.core.node.ExecutionContext;
import org.knime.core.node.ExecutionMonitor;
import org.knime.core.node.InvalidSettingsException;
import org.knime.core.node.NodeLogger;
import org.knime.core.node.NodeModel;
import org.knime.core.node.NodeSettingsRO;
import org.knime.core.node.NodeSettingsWO;
import org.knime.core.node.defaultnodesettings.SettingsModelBoolean;
import org.knime.core.node.defaultnodesettings.SettingsModelString;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import ws.palladian.helper.UrlHelper;
import ws.palladian.helper.html.HtmlHelper;
import ws.palladian.nodes.helper.PalladianKnimeHelper;
import ws.palladian.nodes.retrieval.HttpResultValue;
import ws.palladian.retrieval.parser.DocumentParser;
import ws.palladian.retrieval.parser.ParserException;
import ws.palladian.retrieval.parser.ParserFactory;

/* loaded from: input_file:PalladianNodes.jar:ws/palladian/nodes/retrieval/parser/HtmlParserNodeModel.class */
public class HtmlParserNodeModel extends NodeModel {
    private static final NodeLogger logger = NodeLogger.getLogger(HtmlParserNodeModel.class);
    static final String CFGKEY_INPUT_COLUMN_NAME = "httpResultColumn";
    static final String CFGKEY_MAKE_ABSOLUTE_URLS = "settingAbsoluteUrls";
    static final boolean DEFAULT_MAKE_ABSOLUTE_URLS = true;
    private final SettingsModelString settingColumnName;
    private final SettingsModelBoolean settingAbsoluteUrls;
    private final DocumentParser parser;

    /* JADX INFO: Access modifiers changed from: protected */
    public HtmlParserNodeModel() {
        super(1, 1);
        this.settingColumnName = HtmlParserNodeDialog.createSettingsColumnName();
        this.settingAbsoluteUrls = HtmlParserNodeDialog.createSettingsAbsoluteUrls();
        this.parser = ParserFactory.createHtmlParser();
    }

    protected BufferedDataTable[] execute(BufferedDataTable[] bufferedDataTableArr, ExecutionContext executionContext) throws Exception {
        BufferedDataTable bufferedDataTable = bufferedDataTableArr[0];
        return new BufferedDataTable[]{executionContext.createColumnRearrangeTable(bufferedDataTable, createColumnRearranger(bufferedDataTable.getSpec(), bufferedDataTableArr[0].getSpec().findColumnIndex(this.settingColumnName.getStringValue()), this.settingAbsoluteUrls.getBooleanValue()), executionContext)};
    }

    private ColumnRearranger createColumnRearranger(DataTableSpec dataTableSpec, final int i, boolean z) {
        SingleCellFactory singleCellFactory = new SingleCellFactory(new DataColumnSpecCreator("Document", XMLCell.TYPE).createSpec()) { // from class: ws.palladian.nodes.retrieval.parser.HtmlParserNodeModel.1
            public DataCell getCell(DataRow dataRow) {
                try {
                    Document parse = HtmlParserNodeModel.this.parser.parse(HtmlParserNodeModel.this.getInputSource(dataRow.getCell(i)));
                    if (HtmlParserNodeModel.this.settingAbsoluteUrls.getBooleanValue()) {
                        UrlHelper.makeAbsoluteUrls(parse);
                    }
                    try {
                        return XMLCellFactory.create(HtmlHelper.xmlToString(parse));
                    } catch (Exception e) {
                        HtmlParserNodeModel.logger.warn("Error creating the result in row " + dataRow.getKey() + ": " + e.getMessage(), e);
                        return DataType.getMissingCell();
                    }
                } catch (ParserException e2) {
                    HtmlParserNodeModel.logger.warn("Error parsing document in row " + dataRow.getKey() + ": " + e2.getMessage());
                    return DataType.getMissingCell();
                }
            }
        };
        ColumnRearranger columnRearranger = new ColumnRearranger(dataTableSpec);
        columnRearranger.append(singleCellFactory);
        return columnRearranger;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public InputSource getInputSource(DataCell dataCell) {
        InputSource inputSource;
        DataType type = dataCell.getType();
        if (type.isCompatible(HttpResultValue.class)) {
            inputSource = new InputSource(new ByteArrayInputStream(((HttpResultValue) dataCell).getHttpResult().getContent()));
        } else {
            if (!type.isCompatible(StringValue.class)) {
                throw new IllegalStateException("Unsupported type: " + type);
            }
            inputSource = new InputSource(((StringValue) dataCell).getStringValue());
        }
        return inputSource;
    }

    protected void reset() {
    }

    protected DataTableSpec[] configure(DataTableSpec[] dataTableSpecArr) throws InvalidSettingsException {
        DataTableSpec dataTableSpec = dataTableSpecArr[0];
        if (PalladianKnimeHelper.getColumn(dataTableSpec, this.settingColumnName.getStringValue(), HttpResultValue.class, StringValue.class) == null) {
            String name = PalladianKnimeHelper.guessColumn(dataTableSpec, HttpResultValue.class, StringValue.class).getName();
            setWarningMessage("Guessing input column: " + name);
            this.settingColumnName.setStringValue(name);
        }
        return new DataTableSpec[]{createColumnRearranger(dataTableSpec, 0, false).createSpec()};
    }

    protected void saveSettingsTo(NodeSettingsWO nodeSettingsWO) {
        this.settingColumnName.saveSettingsTo(nodeSettingsWO);
        this.settingAbsoluteUrls.saveSettingsTo(nodeSettingsWO);
    }

    protected void loadValidatedSettingsFrom(NodeSettingsRO nodeSettingsRO) throws InvalidSettingsException {
        this.settingColumnName.loadSettingsFrom(nodeSettingsRO);
        this.settingAbsoluteUrls.loadSettingsFrom(nodeSettingsRO);
    }

    protected void validateSettings(NodeSettingsRO nodeSettingsRO) throws InvalidSettingsException {
        this.settingColumnName.validateSettings(nodeSettingsRO);
        this.settingAbsoluteUrls.validateSettings(nodeSettingsRO);
    }

    protected void loadInternals(File file, ExecutionMonitor executionMonitor) throws IOException, CanceledExecutionException {
    }

    protected void saveInternals(File file, ExecutionMonitor executionMonitor) throws IOException, CanceledExecutionException {
    }
}
