/*
 * Decompiled with CFR 0.152.
 */
package spim.process.interestpointdetection;

import java.util.List;
import mpicbg.imglib.algorithm.OutputAlgorithm;
import mpicbg.imglib.image.Image;
import mpicbg.imglib.type.numeric.real.FloatType;
import mpicbg.imglib.wrapper.ImgLib2;
import mpicbg.spim.io.IOFunctions;
import net.imglib2.Interval;
import net.imglib2.RandomAccessible;
import net.imglib2.RandomAccessibleInterval;
import net.imglib2.img.Img;
import net.imglib2.img.array.ArrayImg;
import net.imglib2.img.array.ArrayImgs;
import net.imglib2.img.basictypeaccess.array.FloatArray;
import net.imglib2.util.Util;
import net.imglib2.view.ExtendedRandomAccessibleInterval;
import net.imglib2.view.Views;
import spim.process.cuda.Block;
import spim.process.cuda.BlockGenerator;
import spim.process.cuda.BlockGeneratorVariableSizePrecise;
import spim.process.cuda.BlockGeneratorVariableSizeSimple;
import spim.process.cuda.CUDADevice;
import spim.process.cuda.CUDASeparableConvolution;
import spim.process.cuda.CUDASeparableConvolutionFunctions;
import spim.process.interestpointdetection.DifferenceOfGaussianNewPeakFinder;

public class DifferenceOfGaussianCUDA
extends DifferenceOfGaussianNewPeakFinder {
    final Img<net.imglib2.type.numeric.real.FloatType> img2;
    final List<CUDADevice> devList;
    final CUDASeparableConvolution cuda;
    final boolean accurate;
    final double percentGPUMem;
    final CUDADevice cudaDev1;
    final CUDADevice cudaDev2;
    int countCUDA = 0;

    public DifferenceOfGaussianCUDA(CUDASeparableConvolution cuda, double percentGPUMem, List<CUDADevice> devList, Image<FloatType> img1, Img<net.imglib2.type.numeric.real.FloatType> img2, boolean accurate, double[] sigma1, double[] sigma2, double minPeakValue, double normalizationFactor) {
        super(img1, null, sigma1, sigma2, minPeakValue, normalizationFactor);
        this.img2 = img2;
        this.percentGPUMem = percentGPUMem;
        this.devList = devList;
        this.cuda = cuda;
        this.accurate = accurate;
        if (devList.size() > 1) {
            this.setComputeConvolutionsParalell(true);
            this.cudaDev1 = devList.get(0);
            this.cudaDev2 = devList.get(1);
        } else {
            this.setComputeConvolutionsParalell(false);
            this.cudaDev1 = this.cudaDev2 = devList.get(0);
        }
    }

    protected OutputAlgorithm<FloatType> getGaussianConvolution(double[] sigma, int numThreads) {
        if (this.countCUDA == 0) {
            this.countCUDA = 1;
            return new CUDAOutput(this.img2, this.percentGPUMem, this.cudaDev1, this.cuda, this.accurate, sigma);
        }
        this.countCUDA = 0;
        return new CUDAOutput(this.img2, this.percentGPUMem, this.cudaDev2, this.cuda, this.accurate, sigma);
    }

    public static void main(String[] args) {
        for (int i = 1; i < 20; ++i) {
            CUDAOutput.computeNumBlocksDim(0x40000000L, (long)i * 1000L * 1024L * 1024L, 80.0, 3, "");
        }
    }

    public static class CUDAOutput
    implements OutputAlgorithm<FloatType> {
        final Img<net.imglib2.type.numeric.real.FloatType> img;
        final Img<net.imglib2.type.numeric.real.FloatType> result;
        final CUDADevice cudaDevice;
        final CUDASeparableConvolutionFunctions cudaconvolve;
        final boolean accurate;
        final double[] sigma;
        final double percentGPUMem;

        public CUDAOutput(Img<net.imglib2.type.numeric.real.FloatType> img, double percentGPUMem, CUDADevice cudaDevice, CUDASeparableConvolution cuda, boolean accurate, double[] sigma) {
            this.img = img;
            this.percentGPUMem = percentGPUMem;
            this.result = img.factory().create(img, (Object)new net.imglib2.type.numeric.real.FloatType());
            this.cudaDevice = cudaDevice;
            this.accurate = accurate;
            this.sigma = sigma;
            this.cudaconvolve = new CUDASeparableConvolutionFunctions(cuda, cudaDevice.getDeviceId());
        }

        public boolean checkInput() {
            return true;
        }

        public boolean process() {
            long memAvail = Math.round((double)this.cudaDevice.getFreeDeviceMemory() * (this.percentGPUMem / 100.0));
            long imgBytes = this.numPixels() * 4L * 2L;
            long[] numBlocksDim = Util.int2long((int[])CUDAOutput.computeNumBlocksDim(memAvail, imgBytes, this.percentGPUMem, this.img.numDimensions(), "CUDA-Device " + this.cudaDevice.getDeviceId()));
            BlockGenerator<Block> generator = this.accurate ? new BlockGeneratorVariableSizePrecise(numBlocksDim) : new BlockGeneratorVariableSizeSimple(numBlocksDim);
            Block[] blocks = generator.divideIntoBlocks(CUDAOutput.getImgSize(this.img), CUDAOutput.getKernelSize(this.sigma));
            if (!this.accurate && blocks.length == 1 && ArrayImg.class.isInstance(this.img)) {
                IOFunctions.println("Conovlving image as one single block.");
                long time = System.currentTimeMillis();
                blocks[0].copyBlock((RandomAccessible<net.imglib2.type.numeric.real.FloatType>)this.img, (RandomAccessibleInterval<net.imglib2.type.numeric.real.FloatType>)this.result);
                long copy = System.currentTimeMillis();
                IOFunctions.println("Copying data took " + (copy - time) + "ms");
                float[] resultF = ((FloatArray)((ArrayImg)this.result).update(null)).getCurrentStorageArray();
                this.cudaconvolve.gauss(resultF, CUDAOutput.getImgSizeInt(this.result), this.sigma, CUDASeparableConvolutionFunctions.OutOfBounds.EXTEND_BORDER_PIXELS, 0.0f);
                IOFunctions.println("Convolution took " + (System.currentTimeMillis() - copy) + "ms using device=" + this.cudaDevice.getDeviceName() + " (id=" + this.cudaDevice.getDeviceId() + ")");
            } else {
                ExtendedRandomAccessibleInterval input = this.accurate ? Views.extendMirrorSingle(this.img) : this.img;
                for (Block block : blocks) {
                    ArrayImg imgBlock = ArrayImgs.floats((long[])block.getBlockSize());
                    block.copyBlock((RandomAccessible<net.imglib2.type.numeric.real.FloatType>)input, (RandomAccessibleInterval<net.imglib2.type.numeric.real.FloatType>)imgBlock);
                    float[] imgBlockF = ((FloatArray)imgBlock.update(null)).getCurrentStorageArray();
                    this.cudaconvolve.gauss(imgBlockF, CUDAOutput.getImgSizeInt((Interval)imgBlock), this.sigma, CUDASeparableConvolutionFunctions.OutOfBounds.EXTEND_BORDER_PIXELS, 0.0f);
                    block.pasteBlock((RandomAccessibleInterval<net.imglib2.type.numeric.real.FloatType>)this.result, (RandomAccessibleInterval<net.imglib2.type.numeric.real.FloatType>)imgBlock);
                }
            }
            return true;
        }

        public String getErrorMessage() {
            return "";
        }

        public Image<FloatType> getResult() {
            return ImgLib2.wrapFloatToImgLib1(this.result);
        }

        protected static long[] getKernelSize(double[] sigma) {
            long[] dim = new long[sigma.length];
            for (int d = 0; d < sigma.length; ++d) {
                dim[d] = mpicbg.imglib.util.Util.createGaussianKernel1DDouble((double)sigma[d], (boolean)false).length;
            }
            return dim;
        }

        public static long[] getImgSize(Interval img) {
            long[] dim = new long[img.numDimensions()];
            for (int d = 0; d < img.numDimensions(); ++d) {
                dim[d] = img.dimension(d);
            }
            return dim;
        }

        protected static int[] getKernelSizeInt(double[] sigma) {
            int[] dim = new int[sigma.length];
            for (int d = 0; d < sigma.length; ++d) {
                dim[d] = mpicbg.imglib.util.Util.createGaussianKernel1DDouble((double)sigma[d], (boolean)false).length;
            }
            return dim;
        }

        public static int[] getImgSizeInt(Interval img) {
            int[] dim = new int[img.numDimensions()];
            for (int d = 0; d < img.numDimensions(); ++d) {
                dim[d] = (int)img.dimension(d);
            }
            return dim;
        }

        public static int[] computeNumBlocksDim(long memAvail, long memReq, double percentGPUMem, int n, String start) {
            int numBlocksCurrent;
            int numBlocks = (int)(memReq / memAvail + Math.min(1L, memReq % memAvail));
            double blocksPerDim = Math.pow(numBlocks, 1 / n);
            int[] numBlocksDim = new int[n];
            for (int d = 0; d < numBlocksDim.length; ++d) {
                numBlocksDim[d] = (int)Math.round(Math.floor(blocksPerDim)) + 1;
            }
            do {
                numBlocksCurrent = CUDAOutput.numBlocks(numBlocksDim);
                int d = 0;
                while (d < numBlocksDim.length) {
                    int n2 = d++;
                    numBlocksDim[n2] = numBlocksDim[n2] + 1;
                    CUDAOutput.reduceBlockNumbers(numBlocksDim, numBlocks);
                }
            } while (CUDAOutput.numBlocks(numBlocksDim) < numBlocksCurrent);
            if (start != null) {
                String out = start + ", mem=" + memAvail / 0x100000L + "MB (" + Math.round(percentGPUMem / 100.0) + "%), required mem=" + memReq / 0x100000L + "MB, need to split up into " + numBlocks + " blocks: ";
                for (int d = 0; d < numBlocksDim.length; ++d) {
                    out = out + numBlocksDim[d];
                    if (d == numBlocksDim.length - 1) continue;
                    out = out + "x";
                }
                IOFunctions.println(out);
            }
            return numBlocksDim;
        }

        protected static void reduceBlockNumbers(int[] numBlocksDim, int numBlocks) {
            boolean reduced;
            do {
                reduced = false;
                for (int d = numBlocksDim.length - 1; d >= 0; --d) {
                    if (numBlocksDim[d] <= 1) continue;
                    int n = d;
                    numBlocksDim[n] = numBlocksDim[n] - 1;
                    if (CUDAOutput.numBlocks(numBlocksDim) < numBlocks) {
                        int n2 = d;
                        numBlocksDim[n2] = numBlocksDim[n2] + 1;
                        continue;
                    }
                    reduced = true;
                }
            } while (reduced);
        }

        protected static int numBlocks(int[] numBlocksDim) {
            int numBlocks = 1;
            for (int d = 0; d < numBlocksDim.length; ++d) {
                numBlocks *= numBlocksDim[d];
            }
            return numBlocks;
        }

        protected long numPixels() {
            if (this.accurate) {
                long size = 1L;
                for (int d = 0; d < this.img.numDimensions(); ++d) {
                    size *= this.img.dimension(d) + (long)mpicbg.imglib.util.Util.createGaussianKernel1DDouble((double)this.sigma[d], (boolean)false).length - 1L;
                }
                return size;
            }
            return this.img.size();
        }
    }
}

