package ws.palladian.nodes.preprocessing.scraping;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.knime.core.data.DataCell;
import org.knime.core.data.DataRow;
import org.knime.core.data.DataTableSpec;
import org.knime.core.data.RowKey;
import org.knime.core.data.def.DefaultRow;
import org.knime.core.data.xml.XMLValue;
import org.knime.core.node.BufferedDataContainer;
import org.knime.core.node.BufferedDataTable;
import org.knime.core.node.CanceledExecutionException;
import org.knime.core.node.ExecutionContext;
import org.knime.core.node.ExecutionMonitor;
import org.knime.core.node.InvalidSettingsException;
import org.knime.core.node.NodeLogger;
import org.knime.core.node.NodeModel;
import org.knime.core.node.NodeSettingsRO;
import org.knime.core.node.NodeSettingsWO;
import org.knime.core.node.defaultnodesettings.SettingsModelString;
import org.knime.ext.textprocessing.data.DocumentBlobCell;
import org.knime.ext.textprocessing.data.DocumentBuilder;
import org.knime.ext.textprocessing.data.SectionAnnotation;
import org.knime.ext.textprocessing.util.DocumentDataTableBuilder;
import org.w3c.dom.Document;
import ws.palladian.extraction.content.PalladianContentExtractor;
import ws.palladian.extraction.content.ReadabilityContentExtractor;
import ws.palladian.extraction.content.WebPageContentExtractor;
import ws.palladian.nodes.helper.PalladianKnimeHelper;

/* loaded from: input_file:PalladianNodes.jar:ws/palladian/nodes/preprocessing/scraping/ContentExtractorNodeModel.class */
public class ContentExtractorNodeModel extends NodeModel {
    private static final NodeLogger logger;
    static final String CFGKEY_DOCUMENT_COLUMN_NAME = "settingDocumentColumn";
    static final String CFGKEY_EXTRACTOR_TYPE = "contentExtractorAlgorithm";
    static final String CONTENT_EXTRACTOR_READABILITY = "Readability";
    static final String CONTENT_EXTRACTOR_PALLADIAN = "Palladian";
    static final List<String> CONTENT_EXTRACTORS;
    static final String DEFAULT_EXTRACTOR_TYPE = "Readability";
    private final SettingsModelString settingExtractorType;
    private final SettingsModelString settingDocumentColumn;
    private DocumentDataTableBuilder dataTableBuilder;
    static final /* synthetic */ boolean $assertionsDisabled;

    static {
        $assertionsDisabled = !ContentExtractorNodeModel.class.desiredAssertionStatus();
        logger = NodeLogger.getLogger(ContentExtractorNodeModel.class);
        CONTENT_EXTRACTORS = Arrays.asList("Readability", CONTENT_EXTRACTOR_PALLADIAN);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public ContentExtractorNodeModel() {
        super(1, 1);
        this.settingExtractorType = ContentExtractorNodeDialog.createSettingsExtractionAlgorithm();
        this.settingDocumentColumn = ContentExtractorNodeDialog.createSettingsDocumentColumn();
        this.dataTableBuilder = new DocumentDataTableBuilder();
    }

    protected BufferedDataTable[] execute(BufferedDataTable[] bufferedDataTableArr, ExecutionContext executionContext) throws Exception {
        BufferedDataContainer createDataContainer = executionContext.createDataContainer(this.dataTableBuilder.createDataTableSpec());
        WebPageContentExtractor createContentExtractor = createContentExtractor(this.settingExtractorType.getStringValue());
        int rowCount = bufferedDataTableArr[0].getRowCount();
        int i = 0;
        int i2 = 0;
        int findColumnIndex = bufferedDataTableArr[0].getSpec().findColumnIndex(this.settingDocumentColumn.getStringValue());
        if (!$assertionsDisabled && findColumnIndex <= -1) {
            throw new AssertionError();
        }
        Iterator it = bufferedDataTableArr[0].iterator();
        while (it.hasNext()) {
            Document document = ((DataRow) it.next()).getCell(findColumnIndex).getDocument();
            DocumentBuilder documentBuilder = new DocumentBuilder();
            int i3 = i2;
            i2++;
            RowKey createRowKey = RowKey.createRowKey(i3);
            createContentExtractor.setDocument(document);
            documentBuilder.addTitle(createContentExtractor.getResultTitle());
            documentBuilder.addSection(createContentExtractor.getResultText(), SectionAnnotation.UNKNOWN);
            createDataContainer.addRowToTable(new DefaultRow(createRowKey, new DataCell[]{new DocumentBlobCell(documentBuilder.createDocument())}));
            executionContext.checkCanceled();
            int i4 = i;
            i++;
            executionContext.setProgress(i4 / rowCount);
        }
        createDataContainer.close();
        return new BufferedDataTable[]{createDataContainer.getTable()};
    }

    private WebPageContentExtractor createContentExtractor(String str) {
        logger.debug("creating " + str);
        if ("Readability".equals(str)) {
            return new ReadabilityContentExtractor();
        }
        if (CONTENT_EXTRACTOR_PALLADIAN.equals(str)) {
            return new PalladianContentExtractor();
        }
        throw new IllegalStateException("Unknown content extractor: " + str);
    }

    protected void reset() {
    }

    protected DataTableSpec[] configure(DataTableSpec[] dataTableSpecArr) throws InvalidSettingsException {
        DataTableSpec dataTableSpec = dataTableSpecArr[0];
        if (PalladianKnimeHelper.getColumn(dataTableSpec, this.settingDocumentColumn.getStringValue(), XMLValue.class) == null) {
            String name = PalladianKnimeHelper.guessColumn(dataTableSpec, XMLValue.class).getName();
            setWarningMessage("Guessing input column: " + name);
            this.settingDocumentColumn.setStringValue(name);
        }
        return new DataTableSpec[]{this.dataTableBuilder.createDataTableSpec()};
    }

    protected void saveSettingsTo(NodeSettingsWO nodeSettingsWO) {
        this.settingExtractorType.saveSettingsTo(nodeSettingsWO);
        this.settingDocumentColumn.saveSettingsTo(nodeSettingsWO);
    }

    protected void loadValidatedSettingsFrom(NodeSettingsRO nodeSettingsRO) throws InvalidSettingsException {
        this.settingExtractorType.loadSettingsFrom(nodeSettingsRO);
        this.settingDocumentColumn.loadSettingsFrom(nodeSettingsRO);
    }

    protected void validateSettings(NodeSettingsRO nodeSettingsRO) throws InvalidSettingsException {
        this.settingExtractorType.validateSettings(nodeSettingsRO);
        this.settingDocumentColumn.validateSettings(nodeSettingsRO);
    }

    protected void loadInternals(File file, ExecutionMonitor executionMonitor) throws IOException, CanceledExecutionException {
    }

    protected void saveInternals(File file, ExecutionMonitor executionMonitor) throws IOException, CanceledExecutionException {
    }
}
