package com.aliasi.crf;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.Chunker;
import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.ConfidenceChunker;
import com.aliasi.chunk.NBestChunker;
import com.aliasi.chunk.TagChunkCodec;
import com.aliasi.corpus.Corpus;
import com.aliasi.corpus.ObjectHandler;
import com.aliasi.io.Reporter;
import com.aliasi.io.Reporters;
import com.aliasi.stats.AnnealingSchedule;
import com.aliasi.stats.RegressionPrior;
import com.aliasi.tag.ScoredTagging;
import com.aliasi.tag.StringTagging;
import com.aliasi.tag.Tagging;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.ScoredObject;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import ws.palladian.helper.io.FileHelper;

/* loaded from: input_file:lib/palladian.jar:com/aliasi/crf/ChainCrfChunker.class */
public class ChainCrfChunker implements Chunker, ConfidenceChunker, NBestChunker, Serializable {
    static final long serialVersionUID = -2244399751558084581L;
    private final ChainCrf<String> mCrf;
    private final TokenizerFactory mTokenizerFactory;
    private final TagChunkCodec mCodec;
    static final boolean ALLOW_UNSEEN_TAG_TRANSITIONS = false;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:lib/palladian.jar:com/aliasi/crf/ChainCrfChunker$ChunkingAdapter.class */
    public static class ChunkingAdapter implements ObjectHandler<Chunking> {
        private final ObjectHandler<Tagging<String>> mTagHandler;
        private final TagChunkCodec mCodec;

        public ChunkingAdapter(ObjectHandler<Tagging<String>> objectHandler, TagChunkCodec tagChunkCodec) {
            this.mTagHandler = objectHandler;
            this.mCodec = tagChunkCodec;
        }

        @Override // com.aliasi.corpus.ObjectHandler
        public void handle(Chunking chunking) {
            this.mTagHandler.handle(this.mCodec.toTagging(chunking));
        }
    }

    /* loaded from: input_file:lib/palladian.jar:com/aliasi/crf/ChainCrfChunker$IteratorWrapper.class */
    static class IteratorWrapper implements Iterator<ScoredObject<Chunking>> {
        private final Iterator<ScoredTagging<String>> mIt;
        private final PreTagging mPreTagging;
        private final char[] mCs;
        private final int mStart;
        private final int mEnd;
        private final TagChunkCodec mCodec;

        IteratorWrapper(Iterator<ScoredTagging<String>> it, PreTagging preTagging, char[] cArr, int i, int i2, TagChunkCodec tagChunkCodec) {
            this.mIt = it;
            this.mPreTagging = preTagging;
            this.mCs = cArr;
            this.mStart = i;
            this.mEnd = i2;
            this.mCodec = tagChunkCodec;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.mIt.hasNext();
        }

        @Override // java.util.Iterator
        public void remove() {
            this.mIt.remove();
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public ScoredObject<Chunking> next() {
            ScoredTagging<String> next = this.mIt.next();
            return new ScoredObject<>(ChainCrfChunker.toChunking(next, this.mPreTagging, this.mCs, this.mStart, this.mEnd, this.mCodec), next.score());
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:lib/palladian.jar:com/aliasi/crf/ChainCrfChunker$PreTagging.class */
    public static class PreTagging {
        final List<String> mTokens;
        final int[] mTokenStarts;
        final int[] mTokenEnds;

        public PreTagging(List<String> list, int[] iArr, int[] iArr2) {
            this.mTokens = list;
            this.mTokenStarts = iArr;
            this.mTokenEnds = iArr2;
        }
    }

    /* loaded from: input_file:lib/palladian.jar:com/aliasi/crf/ChainCrfChunker$Serializer.class */
    static class Serializer extends AbstractExternalizable {
        static final long serialVersionUID = 2460314741682974199L;
        private final ChainCrfChunker mChunker;

        public Serializer() {
            this(null);
        }

        public Serializer(ChainCrfChunker chainCrfChunker) {
            this.mChunker = chainCrfChunker;
        }

        @Override // com.aliasi.util.AbstractExternalizable
        public Object read(ObjectInput objectInput) throws IOException, ClassNotFoundException {
            return new ChainCrfChunker((ChainCrf) objectInput.readObject(), (TokenizerFactory) objectInput.readObject(), (TagChunkCodec) objectInput.readObject());
        }

        @Override // com.aliasi.util.AbstractExternalizable, java.io.Externalizable
        public void writeExternal(ObjectOutput objectOutput) throws IOException {
            objectOutput.writeObject(this.mChunker.mCrf);
            objectOutput.writeObject(this.mChunker.mTokenizerFactory);
            objectOutput.writeObject(this.mChunker.mCodec);
        }
    }

    /* loaded from: input_file:lib/palladian.jar:com/aliasi/crf/ChainCrfChunker$TagCorpus.class */
    static class TagCorpus extends Corpus<ObjectHandler<Tagging<String>>> {
        private final Corpus<ObjectHandler<Chunking>> mChunkingCorpus;
        private final TagChunkCodec mCodec;

        public TagCorpus(Corpus<ObjectHandler<Chunking>> corpus, TagChunkCodec tagChunkCodec) {
            this.mChunkingCorpus = corpus;
            this.mCodec = tagChunkCodec;
        }

        @Override // com.aliasi.corpus.Corpus
        public void visitTrain(ObjectHandler<Tagging<String>> objectHandler) throws IOException {
            this.mChunkingCorpus.visitTrain(new ChunkingAdapter(objectHandler, this.mCodec));
        }

        @Override // com.aliasi.corpus.Corpus
        public void visitTest(ObjectHandler<Tagging<String>> objectHandler) throws IOException {
            this.mChunkingCorpus.visitTest(new ChunkingAdapter(objectHandler, this.mCodec));
        }
    }

    public ChainCrfChunker(ChainCrf<String> chainCrf, TokenizerFactory tokenizerFactory, TagChunkCodec tagChunkCodec) {
        this.mCrf = chainCrf;
        this.mTokenizerFactory = tokenizerFactory;
        this.mCodec = tagChunkCodec;
    }

    public ChainCrf<String> crf() {
        return this.mCrf;
    }

    public TagChunkCodec codec() {
        return this.mCodec;
    }

    public TokenizerFactory tokenizerFactory() {
        return this.mTokenizerFactory;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("TagChunkCodec=" + codec());
        sb.append(FileHelper.NEWLINE_CHARACTER);
        sb.append("Tokenizer Factory=" + tokenizerFactory());
        sb.append(FileHelper.NEWLINE_CHARACTER);
        sb.append("CRF=\n");
        sb.append(crf().toString());
        return sb.toString();
    }

    @Override // com.aliasi.chunk.Chunker
    public Chunking chunk(CharSequence charSequence) {
        char[] charArray = Strings.toCharArray(charSequence);
        return chunk(charArray, 0, charArray.length);
    }

    @Override // com.aliasi.chunk.Chunker
    public Chunking chunk(char[] cArr, int i, int i2) {
        PreTagging preTag = preTag(cArr, i, i2);
        return toChunking(this.mCrf.tag(preTag.mTokens), preTag, cArr, i, i2, this.mCodec);
    }

    @Override // com.aliasi.chunk.NBestChunker
    public Iterator<ScoredObject<Chunking>> nBest(char[] cArr, int i, int i2, int i3) {
        PreTagging preTag = preTag(cArr, i, i2);
        return new IteratorWrapper(this.mCrf.tagNBest(preTag.mTokens, i3), preTag, cArr, i, i2, this.mCodec);
    }

    public Iterator<ScoredObject<Chunking>> nBestConditional(char[] cArr, int i, int i2, int i3) {
        PreTagging preTag = preTag(cArr, i, i2);
        return new IteratorWrapper(this.mCrf.tagNBestConditional(preTag.mTokens, i3), preTag, cArr, i, i2, this.mCodec);
    }

    @Override // com.aliasi.chunk.ConfidenceChunker
    public Iterator<Chunk> nBestChunks(char[] cArr, int i, int i2, int i3) {
        PreTagging preTag = preTag(cArr, i, i2);
        return this.mCodec.nBestChunks(this.mCrf.tagMarginal(preTag.mTokens), preTag.mTokenStarts, preTag.mTokenEnds, i3);
    }

    PreTagging preTag(char[] cArr, int i, int i2) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        Tokenizer tokenizer = this.mTokenizerFactory.tokenizer(cArr, i, i2 - i);
        while (true) {
            String nextToken = tokenizer.nextToken();
            if (nextToken == null) {
                return new PreTagging(arrayList3, toArray(arrayList), toArray(arrayList2));
            }
            arrayList3.add(nextToken);
            arrayList.add(Integer.valueOf(tokenizer.lastTokenStartPosition()));
            arrayList2.add(Integer.valueOf(tokenizer.lastTokenEndPosition()));
        }
    }

    Object writeReplace() {
        return new Serializer(this);
    }

    public static ChainCrfChunker estimate(Corpus<ObjectHandler<Chunking>> corpus, TagChunkCodec tagChunkCodec, TokenizerFactory tokenizerFactory, ChainCrfFeatureExtractor<String> chainCrfFeatureExtractor, boolean z, int i, boolean z2, RegressionPrior regressionPrior, int i2, AnnealingSchedule annealingSchedule, double d, int i3, int i4, Reporter reporter) throws IOException {
        if (reporter == null) {
            reporter = Reporters.silent();
        }
        reporter.info("Training chain CRF chunker");
        reporter.info("Converting chunk corpus to tag corpus using codec.");
        return new ChainCrfChunker(ChainCrf.estimate(new TagCorpus(corpus, tagChunkCodec), chainCrfFeatureExtractor, z, i, z2, false, regressionPrior, i2, annealingSchedule, d, i3, i4, reporter), tokenizerFactory, tagChunkCodec);
    }

    static Chunking toChunking(Tagging<String> tagging, PreTagging preTagging, char[] cArr, int i, int i2, TagChunkCodec tagChunkCodec) {
        String str = new String(cArr, i, i2 - i);
        return tagChunkCodec.toChunking(new StringTagging(preTagging.mTokens, tagging.tags(), str, preTagging.mTokenStarts, preTagging.mTokenEnds));
    }

    static int[] toArray(List<Integer> list) {
        int size = list.size();
        int[] iArr = new int[size];
        for (int i = 0; i < size; i++) {
            iArr[i] = list.get(i).intValue();
        }
        return iArr;
    }
}
