/*
 * Decompiled with CFR 0.152.
 */
package edu.mines.jtk.dsp;

import edu.mines.jtk.dsp.Tensors2;
import edu.mines.jtk.dsp.Tensors3;
import edu.mines.jtk.util.ArrayMath;
import edu.mines.jtk.util.Parallel;

public class LocalDiffusionKernel {
    private static Tensors2 IDENTITY_TENSORS2 = new Tensors2(){

        @Override
        public void getTensor(int i1, int i2, float[] d) {
            d[0] = 1.0f;
            d[1] = 0.0f;
            d[2] = 1.0f;
        }
    };
    private static Tensors3 IDENTITY_TENSORS3 = new Tensors3(){

        @Override
        public void getTensor(int i1, int i2, int i3, float[] d) {
            d[0] = 1.0f;
            d[1] = 0.0f;
            d[2] = 0.0f;
            d[3] = 1.0f;
            d[4] = 0.0f;
            d[5] = 1.0f;
        }
    };
    private Stencil _stencil;
    private int _npass = 1;
    private boolean _parallel = true;
    private static final float[] C71 = new float[]{0.0f, 0.830893f, -0.227266f, 0.042877f};
    private static final float[] C91 = new float[]{0.0f, 0.8947167f, -0.3153471f, 0.1096895f, -0.0259358f};

    public LocalDiffusionKernel() {
        this(Stencil.D22);
    }

    public LocalDiffusionKernel(Stencil s) {
        this._stencil = s;
    }

    public Stencil getStencil() {
        return this._stencil;
    }

    public void setNumberOfPasses(int npass) {
        this._npass = npass;
    }

    public void apply(float[][] x, float[][] y) {
        this.apply(null, 1.0f, x, y);
    }

    public void apply(Tensors2 d, float[][] x, float[][] y) {
        this.apply(d, 1.0f, x, y);
    }

    public void apply(float c, float[][] x, float[][] y) {
        this.apply(null, c, (float[][])null, x, y);
    }

    public void apply(Tensors2 d, float c, float[][] x, float[][] y) {
        this.apply(d, c, (float[][])null, x, y);
    }

    public void apply(float c, float[][] s, float[][] x, float[][] y) {
        this.apply(null, c, s, x, y);
    }

    public void apply(Tensors2 d, float c, float[][] s, float[][] x, float[][] y) {
        for (int ipass = 0; ipass < this._npass; ++ipass) {
            if (ipass > 0) {
                x = ArrayMath.copy(y);
            }
            if (d == null) {
                d = IDENTITY_TENSORS2;
            }
            if (this._stencil == Stencil.D21) {
                this.apply21(c, s, x, y);
                continue;
            }
            if (this._stencil == Stencil.D22) {
                this.apply22(d, c, s, x, y);
                continue;
            }
            if (this._stencil == Stencil.D24) {
                this.apply24(d, c, s, x, y);
                continue;
            }
            if (this._stencil == Stencil.D33) {
                this.apply33(d, c, s, x, y);
                continue;
            }
            if (this._stencil == Stencil.D71) {
                this.apply71(d, c, s, x, y);
                continue;
            }
            if (this._stencil != Stencil.D91) continue;
            this.apply91(d, c, s, x, y);
        }
    }

    public void apply(float[][][] x, float[][][] y) {
        this.apply(null, 1.0f, x, y);
    }

    public void apply(Tensors3 d, float[][][] x, float[][][] y) {
        this.apply(d, 1.0f, x, y);
    }

    public void apply(float c, float[][][] x, float[][][] y) {
        this.apply(null, c, (float[][][])null, x, y);
    }

    public void apply(Tensors3 d, float c, float[][][] x, float[][][] y) {
        this.apply(d, c, (float[][][])null, x, y);
    }

    public void apply(float c, float[][][] s, float[][][] x, float[][][] y) {
        this.apply(null, c, s, x, y);
    }

    public void apply(Tensors3 d, float c, float[][][] s, float[][][] x, float[][][] y) {
        int n3 = x.length;
        int i3start = 0;
        int i3step = 1;
        int i3stop = n3;
        for (int ipass = 0; ipass < this._npass; ++ipass) {
            if (ipass > 0) {
                x = ArrayMath.copy(y);
            }
            if (d == null) {
                d = IDENTITY_TENSORS3;
            }
            if (this._stencil == Stencil.D21) {
                i3start = 0;
                i3step = 2;
                i3stop = n3;
            } else if (this._stencil == Stencil.D22) {
                i3start = 1;
                i3step = 2;
                i3stop = n3;
            } else if (this._stencil == Stencil.D24) {
                i3start = 1;
                i3step = 4;
                i3stop = n3;
            } else if (this._stencil == Stencil.D33) {
                i3start = 1;
                i3step = 3;
                i3stop = n3 - 1;
            } else if (this._stencil == Stencil.D71) {
                i3start = 0;
                i3step = 7;
                i3stop = n3;
            }
            if (this._parallel) {
                this.applyParallel(i3start, i3step, i3stop, d, c, s, x, y);
                continue;
            }
            this.applySerial(i3start, 1, i3stop, d, c, s, x, y);
        }
    }

    private static void trace(String s) {
        System.out.println(s);
    }

    private void apply(int i3, Tensors3 d, float c, float[][][] s, float[][][] x, float[][][] y) {
        if (this._stencil == Stencil.D21) {
            this.apply21(i3, c, s, x, y);
        } else if (this._stencil == Stencil.D22) {
            this.apply22(i3, d, c, s, x, y);
        } else {
            if (this._stencil == Stencil.D24) {
                throw new UnsupportedOperationException("Stencil.D24 not supported for 3D arrays");
            }
            if (this._stencil == Stencil.D33) {
                this.apply33(i3, d, c, s, x, y);
            } else if (this._stencil == Stencil.D71) {
                this.apply71(i3, d, c, s, x, y);
            } else if (this._stencil == Stencil.D91) {
                throw new UnsupportedOperationException("Stencil.D91 not supported for 3D arrays");
            }
        }
    }

    private void applySerial(int i3start, int i3step, int i3stop, Tensors3 d, float c, float[][][] s, float[][][] x, float[][][] y) {
        for (int i3 = i3start; i3 < i3stop; i3 += i3step) {
            this.apply(i3, d, c, s, x, y);
        }
    }

    private void applyParallel(int i3start, int i3step, int i3stop, final Tensors3 d, final float c, final float[][][] s, final float[][][] x, final float[][][] y) {
        int i3pass = 0;
        while (i3pass < i3step) {
            Parallel.loop(i3start, i3stop, i3step, new Parallel.LoopInt(){

                @Override
                public void compute(int i3) {
                    LocalDiffusionKernel.this.apply(i3, d, c, s, x, y);
                }
            });
            ++i3pass;
            ++i3start;
        }
    }

    private void apply21(float c, float[][] s, float[][] x, float[][] y) {
        int n1 = x[0].length;
        int n2 = x.length;
        for (int i2 = 0; i2 < n2; ++i2) {
            int m2 = i2 > 0 ? i2 - 1 : 0;
            int i1 = 0;
            while (i1 < n1) {
                int m1 = i1 > 0 ? i1 - 1 : 0;
                float cs1 = c;
                float cs2 = c;
                if (s != null) {
                    cs1 *= 0.5f * (s[i2][i1] + s[i2][m1]);
                    cs2 *= 0.5f * (s[i2][i1] + s[m2][i1]);
                }
                float x1 = x[i2][i1] - x[i2][m1];
                float x2 = x[i2][i1] - x[m2][i1];
                float y1 = cs1 * x1;
                float y2 = cs2 * x2;
                float[] fArray = y[i2];
                int n = i1;
                fArray[n] = fArray[n] + (y1 + y2);
                float[] fArray2 = y[i2];
                int n3 = m1;
                fArray2[n3] = fArray2[n3] - y1;
                float[] fArray3 = y[m2];
                int n4 = i1++;
                fArray3[n4] = fArray3[n4] - y2;
            }
        }
    }

    private void apply21(int i3, float c, float[][][] s, float[][][] x, float[][][] y) {
        int n1 = x[0][0].length;
        int n2 = x[0].length;
        int m3 = i3 > 0 ? i3 - 1 : 0;
        for (int i2 = 0; i2 < n2; ++i2) {
            int m2 = i2 > 0 ? i2 - 1 : 0;
            int i1 = 0;
            while (i1 < n1) {
                int m1 = i1 > 0 ? i1 - 1 : 0;
                float cs1 = c;
                float cs2 = c;
                float cs3 = c;
                if (s != null) {
                    cs1 *= 0.5f * (s[i3][i2][i1] + s[i3][i2][m1]);
                    cs2 *= 0.5f * (s[i3][i2][i1] + s[i3][m2][i1]);
                    cs3 *= 0.5f * (s[i3][i2][i1] + s[m3][i2][i1]);
                }
                float x1 = x[i3][i2][i1] - x[i3][i2][m1];
                float x2 = x[i3][i2][i1] - x[i3][m2][i1];
                float x3 = x[i3][i2][i1] - x[m3][i2][i1];
                float y1 = cs1 * x1;
                float y2 = cs2 * x2;
                float y3 = cs3 * x3;
                float[] fArray = y[i3][i2];
                int n = i1;
                fArray[n] = fArray[n] + (y1 + y2 + y3);
                float[] fArray2 = y[i3][i2];
                int n3 = m1;
                fArray2[n3] = fArray2[n3] - y1;
                float[] fArray3 = y[i3][m2];
                int n4 = i1;
                fArray3[n4] = fArray3[n4] - y2;
                float[] fArray4 = y[m3][i2];
                int n5 = i1++;
                fArray4[n5] = fArray4[n5] - y3;
            }
        }
    }

    private void apply22(Tensors2 d, float c, float[][] s, float[][] x, float[][] y) {
        c *= 0.25f;
        int n1 = x[0].length;
        int n2 = x.length;
        float[] di = new float[3];
        for (int i2 = 1; i2 < n2; ++i2) {
            float[] x0 = x[i2];
            float[] xm = x[i2 - 1];
            float[] y0 = y[i2];
            float[] ym = y[i2 - 1];
            int i1 = 1;
            int m1 = 0;
            while (i1 < n1) {
                d.getTensor(i1, i2, di);
                float csi = s != null ? c * s[i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d22 = di[2] * csi;
                float x00 = x0[i1];
                float x0m = x0[m1];
                float xm0 = xm[i1];
                float xmm = xm[m1];
                float xa = x00 - xmm;
                float xb = x0m - xm0;
                float x1 = xa - xb;
                float x2 = xa + xb;
                float y1 = d11 * x1 + d12 * x2;
                float y2 = d12 * x1 + d22 * x2;
                float ya = y1 + y2;
                float yb = y1 - y2;
                int n = i1;
                y0[n] = y0[n] + ya;
                int n3 = m1;
                y0[n3] = y0[n3] - yb;
                int n4 = i1++;
                ym[n4] = ym[n4] + yb;
                int n5 = m1++;
                ym[n5] = ym[n5] - ya;
            }
        }
    }

    private void apply22X(Tensors2 d, float c, float[][] s, float[][] x, float[][] y) {
        c *= 0.25f;
        int n1 = x[0].length;
        int n2 = x.length;
        float[] di = new float[3];
        for (int i2 = 1; i2 < n2; ++i2) {
            float[] xp = x[i2];
            float[] xm = x[i2 - 1];
            float[] yp = y[i2];
            float[] ym = y[i2 - 1];
            float xmp = xm[0];
            float xpp = xp[0];
            float ymp = ym[0];
            float ypp = yp[0];
            int i1 = 1;
            int m1 = 0;
            while (i1 < n1) {
                float xmm = xmp;
                float xpm = xpp;
                xmp = xm[i1];
                xpp = xp[i1];
                float ymm = ymp;
                float ypm = ypp;
                ymp = ym[i1];
                ypp = yp[i1];
                d.getTensor(i1, i2, di);
                float csi = s != null ? c * s[i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d22 = di[2] * csi;
                float xa = xpp - xmm;
                float xb = xpm - xmp;
                float x1 = xa - xb;
                float x2 = xa + xb;
                float y1 = d11 * x1 + d12 * x2;
                float y2 = d12 * x1 + d22 * x2;
                float ya = y1 + y2;
                float yb = y1 - y2;
                ypp += ya;
                ymp += yb;
                ym[m1] = ymm -= ya;
                yp[m1] = ypm -= yb;
                ++i1;
                ++m1;
            }
            ym[n1 - 1] = ymp;
            yp[n1 - 1] = ypp;
        }
    }

    private void apply22(int i3, Tensors3 d, float c, float[][][] s, float[][][] x, float[][][] y) {
        c *= 0.0625f;
        int n1 = x[0][0].length;
        int n2 = x[0].length;
        float[] di = new float[6];
        for (int i2 = 1; i2 < n2; ++i2) {
            float[] x00 = x[i3][i2];
            float[] x0m = x[i3][i2 - 1];
            float[] xm0 = x[i3 - 1][i2];
            float[] xmm = x[i3 - 1][i2 - 1];
            float[] y00 = y[i3][i2];
            float[] y0m = y[i3][i2 - 1];
            float[] ym0 = y[i3 - 1][i2];
            float[] ymm = y[i3 - 1][i2 - 1];
            int i1 = 1;
            int m1 = 0;
            while (i1 < n1) {
                d.getTensor(i1, i2, i3, di);
                float csi = s != null ? c * s[i3][i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d13 = di[2] * csi;
                float d22 = di[3] * csi;
                float d23 = di[4] * csi;
                float d33 = di[5] * csi;
                float xa = x00[i1] - xmm[m1];
                float xb = x00[m1] - xmm[i1];
                float xc = x0m[i1] - xm0[m1];
                float xd = xm0[i1] - x0m[m1];
                float x1 = xa - xb + xc + xd;
                float x2 = xa + xb - xc + xd;
                float x3 = xa + xb + xc - xd;
                float y1 = d11 * x1 + d12 * x2 + d13 * x3;
                float y2 = d12 * x1 + d22 * x2 + d23 * x3;
                float y3 = d13 * x1 + d23 * x2 + d33 * x3;
                float ya = y1 + y2 + y3;
                int n = i1;
                y00[n] = y00[n] + ya;
                int n3 = m1;
                ymm[n3] = ymm[n3] - ya;
                float yb = y1 - y2 + y3;
                int n4 = i1;
                y0m[n4] = y0m[n4] + yb;
                int n5 = m1;
                ym0[n5] = ym0[n5] - yb;
                float yc = y1 + y2 - y3;
                int n6 = i1;
                ym0[n6] = ym0[n6] + yc;
                int n7 = m1;
                y0m[n7] = y0m[n7] - yc;
                float yd = y1 - y2 - y3;
                int n8 = i1++;
                ymm[n8] = ymm[n8] + yd;
                int n9 = m1++;
                y00[n9] = y00[n9] - yd;
            }
        }
    }

    private void apply24(Tensors2 d, float c, float[][] s, float[][] x, float[][] y) {
        float p = 0.18f;
        float a = 0.5f * (1.0f + p);
        float b = 0.5f * -p;
        b /= a;
        c *= a * a;
        int n1 = x[0].length;
        int n2 = x.length;
        float[] di = new float[3];
        int i2m1 = 0;
        int i2p0 = 0;
        int i2p1 = 1;
        for (int i2 = 1; i2 < n2; ++i2) {
            int i2m2 = i2m1;
            i2m1 = i2p0;
            i2p0 = i2p1++;
            if (i2p1 >= n1) {
                i2p1 = n1 - 1;
            }
            float[] xm2 = x[i2m2];
            float[] xm1 = x[i2m1];
            float[] xp0 = x[i2p0];
            float[] xp1 = x[i2p1];
            float[] ym2 = y[i2m2];
            float[] ym1 = y[i2m1];
            float[] yp0 = y[i2p0];
            float[] yp1 = y[i2p1];
            int m1 = 0;
            int p0 = 0;
            int p1 = 1;
            for (int i1 = 1; i1 < n1; ++i1) {
                int m2 = m1;
                m1 = p0;
                p0 = p1++;
                if (p1 >= n1) {
                    p1 = n1 - 1;
                }
                d.getTensor(i1, i2, di);
                float csi = s != null ? c * s[i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d22 = di[2] * csi;
                float xa = xp0[p0] - xm1[m1];
                float xb = xm1[p0] - xp0[m1];
                float x1 = xa + xb + b * (xp1[p0] + xm2[p0] - xp1[m1] - xm2[m1]);
                float x2 = xa - xb + b * (xp0[p1] + xp0[m2] - xm1[p1] - xm1[m2]);
                float y1 = d11 * x1 + d12 * x2;
                float y2 = d12 * x1 + d22 * x2;
                float ya = y1 + y2;
                float yb = y1 - y2;
                float yc = b * y1;
                float yd = b * y2;
                int n = p0;
                yp0[n] = yp0[n] + ya;
                int n3 = m1;
                ym1[n3] = ym1[n3] - ya;
                int n4 = p0;
                ym1[n4] = ym1[n4] + yb;
                int n5 = m1;
                yp0[n5] = yp0[n5] - yb;
                int n6 = p0;
                yp1[n6] = yp1[n6] + yc;
                int n7 = m1;
                ym2[n7] = ym2[n7] - yc;
                int n8 = p0;
                ym2[n8] = ym2[n8] + yc;
                int n9 = m1;
                yp1[n9] = yp1[n9] - yc;
                int n10 = p1;
                yp0[n10] = yp0[n10] + yd;
                int n11 = m2;
                ym1[n11] = ym1[n11] - yd;
                int n12 = m2;
                yp0[n12] = yp0[n12] + yd;
                int n13 = p1;
                ym1[n13] = ym1[n13] - yd;
            }
        }
    }

    private void apply33(Tensors2 d, float c, float[][] s, float[][] x, float[][] y) {
        float p = 0.182962f;
        float a = 0.5f - p;
        float b = 0.5f * p;
        b /= a;
        c *= a * a;
        int n1 = x[0].length;
        int n2 = x.length;
        float[] di = new float[3];
        for (int i2 = 1; i2 < n2 - 1; ++i2) {
            float[] xm = x[i2 - 1];
            float[] x0 = x[i2];
            float[] xp = x[i2 + 1];
            float[] ym = y[i2 - 1];
            float[] y0 = y[i2];
            float[] yp = y[i2 + 1];
            float xm0 = xm[0];
            float xmp = xm[1];
            float x00 = x0[0];
            float x0p = x0[1];
            float xp0 = xp[0];
            float xpp = xp[1];
            float ym0 = ym[0];
            float ymp = ym[1];
            float y00 = y0[0];
            float y0p = y0[1];
            float yp0 = yp[0];
            float ypp = yp[1];
            int i1m = 0;
            int i1 = 1;
            for (int i1p = 2; i1p < n1; ++i1p) {
                float xmm = xm0;
                xm0 = xmp;
                xmp = xm[i1p];
                float x0m = x00;
                x00 = x0p;
                x0p = x0[i1p];
                float xpm = xp0;
                xp0 = xpp;
                xpp = xp[i1p];
                float ymm = ym0;
                ym0 = ymp;
                ymp = ym[i1p];
                float y0m = y00;
                y00 = y0p;
                y0p = y0[i1p];
                float ypm = yp0;
                yp0 = ypp;
                ypp = yp[i1p];
                d.getTensor(i1, i2, di);
                float csi = s != null ? c * s[i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d22 = di[2] * csi;
                float xa = b * (xpp - xmm);
                float xb = b * (xmp - xpm);
                float x1 = x0p - x0m + xa + xb;
                float x2 = xp0 - xm0 + xa - xb;
                float y1 = d11 * x1 + d12 * x2;
                float y2 = d12 * x1 + d22 * x2;
                float ya = b * (y1 + y2);
                float yb = b * (y1 - y2);
                y0p += y1;
                y0m -= y1;
                ypp += ya;
                ymp += yb;
                yp0 += y2;
                ym0 -= y2;
                ym[i1m] = ymm -= ya;
                y0[i1m] = y0m;
                yp[i1m] = ypm -= yb;
                ++i1m;
                ++i1;
            }
            ym[n1 - 2] = ym0;
            ym[n1 - 1] = ymp;
            y0[n1 - 2] = y00;
            y0[n1 - 1] = y0p;
            yp[n1 - 2] = yp0;
            yp[n1 - 1] = ypp;
        }
    }

    private void apply33X(Tensors2 d, float c, float[][] s, float[][] x, float[][] y) {
        float p = 0.182962f;
        float a = 0.5f - p;
        float b = 0.5f * p;
        b /= a;
        c *= a * a;
        int n1 = x[0].length;
        int n2 = x.length;
        float[] di = new float[3];
        for (int i2 = 1; i2 < n2 - 1; ++i2) {
            float[] xm = x[i2 - 1];
            float[] x0 = x[i2];
            float[] xp = x[i2 + 1];
            float[] ym = y[i2 - 1];
            float[] y0 = y[i2];
            float[] yp = y[i2 + 1];
            int m1 = 0;
            int i1 = 1;
            int p1 = 2;
            while (p1 < n1) {
                d.getTensor(i1, i2, di);
                float csi = s != null ? c * s[i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d22 = di[2] * csi;
                float xa = b * (xp[p1] - xm[m1]);
                float xb = b * (xm[p1] - xp[m1]);
                float x1 = x0[p1] - x0[m1] + xa + xb;
                float x2 = xp[i1] - xm[i1] + xa - xb;
                float y1 = d11 * x1 + d12 * x2;
                float y2 = d12 * x1 + d22 * x2;
                float ya = b * (y1 + y2);
                float yb = b * (y1 - y2);
                int n = p1;
                y0[n] = y0[n] + y1;
                int n3 = m1;
                y0[n3] = y0[n3] - y1;
                int n4 = p1;
                yp[n4] = yp[n4] + ya;
                int n5 = m1;
                ym[n5] = ym[n5] - ya;
                int n6 = p1++;
                ym[n6] = ym[n6] + yb;
                int n7 = m1++;
                yp[n7] = yp[n7] - yb;
                int n8 = i1;
                yp[n8] = yp[n8] + y2;
                int n9 = i1++;
                ym[n9] = ym[n9] - y2;
            }
        }
    }

    private void apply33(int i3, Tensors3 d, float c, float[][][] s, float[][][] x, float[][][] y) {
        float p = 0.174654f;
        float a = 1.0f - 2.0f * p;
        float b = p;
        float aa = 0.5f * a * a;
        float ab = 0.5f * a * b;
        float bb = 0.5f * b * b;
        int n1 = x[0][0].length;
        int n2 = x[0].length;
        float[] di = new float[6];
        for (int i2 = 1; i2 < n2 - 1; ++i2) {
            float[] xmm = x[i3 - 1][i2 - 1];
            float[] xm0 = x[i3 - 1][i2];
            float[] xmp = x[i3 - 1][i2 + 1];
            float[] x0m = x[i3][i2 - 1];
            float[] x00 = x[i3][i2];
            float[] x0p = x[i3][i2 + 1];
            float[] xpm = x[i3 + 1][i2 - 1];
            float[] xp0 = x[i3 + 1][i2];
            float[] xpp = x[i3 + 1][i2 + 1];
            float[] ymm = y[i3 - 1][i2 - 1];
            float[] ym0 = y[i3 - 1][i2];
            float[] ymp = y[i3 - 1][i2 + 1];
            float[] y0m = y[i3][i2 - 1];
            float[] y00 = y[i3][i2];
            float[] y0p = y[i3][i2 + 1];
            float[] ypm = y[i3 + 1][i2 - 1];
            float[] yp0 = y[i3 + 1][i2];
            float[] ypp = y[i3 + 1][i2 + 1];
            int m1 = 0;
            int i1 = 1;
            int p1 = 2;
            while (p1 < n1) {
                d.getTensor(i1, i2, i3, di);
                float csi = s != null ? c * s[i3][i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d13 = di[2] * csi;
                float d22 = di[3] * csi;
                float d23 = di[4] * csi;
                float d33 = di[5] * csi;
                float xmmm = xmm[m1];
                float xmm0 = xmm[i1];
                float xmmp = xmm[p1];
                float xm0m = xm0[m1];
                float xm00 = xm0[i1];
                float xm0p = xm0[p1];
                float xmpm = xmp[m1];
                float xmp0 = xmp[i1];
                float xmpp = xmp[p1];
                float x0mm = x0m[m1];
                float x0m0 = x0m[i1];
                float x0mp = x0m[p1];
                float x00m = x00[m1];
                float x00p = x00[p1];
                float x0pm = x0p[m1];
                float x0p0 = x0p[i1];
                float x0pp = x0p[p1];
                float xpmm = xpm[m1];
                float xpm0 = xpm[i1];
                float xpmp = xpm[p1];
                float xp0m = xp0[m1];
                float xp00 = xp0[i1];
                float xp0p = xp0[p1];
                float xppm = xpp[m1];
                float xpp0 = xpp[i1];
                float xppp = xpp[p1];
                float x00p00m = x00p - x00m;
                float x0p00m0 = x0p0 - x0m0;
                float xp00m00 = xp00 - xm00;
                float xmp0mm0 = xmp0 - xmm0;
                float xpp0pm0 = xpp0 - xpm0;
                float xpm0mm0 = xpm0 - xmm0;
                float xpp0mp0 = xpp0 - xmp0;
                float xm0pm0m = xm0p - xm0m;
                float xp0pp0m = xp0p - xp0m;
                float xp0mm0m = xp0m - xm0m;
                float xp0pm0p = xp0p - xm0p;
                float x0mp0mm = x0mp - x0mm;
                float x0pp0pm = x0pp - x0pm;
                float x0pm0mm = x0pm - x0mm;
                float x0pp0mp = x0pp - x0mp;
                float xpppmmm = xppp - xmmm;
                float xppmmmp = xppm - xmmp;
                float xpmpmpm = xpmp - xmpm;
                float xmpppmm = xmpp - xpmm;
                float x1 = aa * x00p00m + ab * (x0pp0pm + x0mp0mm + xp0pp0m + xm0pm0m) + bb * (xpppmmm - xppmmmp + xpmpmpm + xmpppmm);
                float x2 = aa * x0p00m0 + ab * (x0pp0mp + x0pm0mm + xpp0pm0 + xmp0mm0) + bb * (xpppmmm + xppmmmp - xpmpmpm + xmpppmm);
                float x3 = aa * xp00m00 + ab * (xp0pm0p + xp0mm0m + xpp0mp0 + xpm0mm0) + bb * (xpppmmm + xppmmmp + xpmpmpm - xmpppmm);
                float y1 = d11 * x1 + d12 * x2 + d13 * x3;
                float y2 = d12 * x1 + d22 * x2 + d23 * x3;
                float y3 = d13 * x1 + d23 * x2 + d33 * x3;
                float aa00p = aa * y1;
                int n = p1;
                y00[n] = y00[n] + aa00p;
                int n3 = m1;
                y00[n3] = y00[n3] - aa00p;
                float aa0p0 = aa * y2;
                int n4 = i1;
                y0p[n4] = y0p[n4] + aa0p0;
                int n5 = i1;
                y0m[n5] = y0m[n5] - aa0p0;
                float aap00 = aa * y3;
                int n6 = i1;
                yp0[n6] = yp0[n6] + aap00;
                int n7 = i1;
                ym0[n7] = ym0[n7] - aap00;
                float ab0pp = ab * (y1 + y2);
                int n8 = p1;
                y0p[n8] = y0p[n8] + ab0pp;
                int n9 = m1;
                y0m[n9] = y0m[n9] - ab0pp;
                float ab0mp = ab * (y1 - y2);
                int n10 = p1;
                y0m[n10] = y0m[n10] + ab0mp;
                int n11 = m1;
                y0p[n11] = y0p[n11] - ab0mp;
                float abp0p = ab * (y1 + y3);
                int n12 = p1;
                yp0[n12] = yp0[n12] + abp0p;
                int n13 = m1;
                ym0[n13] = ym0[n13] - abp0p;
                float abm0p = ab * (y1 - y3);
                int n14 = p1;
                ym0[n14] = ym0[n14] + abm0p;
                int n15 = m1;
                yp0[n15] = yp0[n15] - abm0p;
                float abpp0 = ab * (y2 + y3);
                int n16 = i1;
                ypp[n16] = ypp[n16] + abpp0;
                int n17 = i1;
                ymm[n17] = ymm[n17] - abpp0;
                float abmp0 = ab * (y2 - y3);
                int n18 = i1;
                ymp[n18] = ymp[n18] + abmp0;
                int n19 = i1++;
                ypm[n19] = ypm[n19] - abmp0;
                float bbppp = bb * (y1 + y2 + y3);
                int n20 = p1;
                ypp[n20] = ypp[n20] + bbppp;
                int n21 = m1;
                ymm[n21] = ymm[n21] - bbppp;
                float bbmmp = bb * (y1 - y2 - y3);
                int n22 = p1;
                ymm[n22] = ymm[n22] + bbmmp;
                int n23 = m1;
                ypp[n23] = ypp[n23] - bbmmp;
                float bbpmp = bb * (y1 - y2 + y3);
                int n24 = p1;
                ypm[n24] = ypm[n24] + bbpmp;
                int n25 = m1;
                ymp[n25] = ymp[n25] - bbpmp;
                float bbmpp = bb * (y1 + y2 - y3);
                int n26 = p1++;
                ymp[n26] = ymp[n26] + bbmpp;
                int n27 = m1++;
                ypm[n27] = ypm[n27] - bbmpp;
            }
        }
    }

    private void apply71X(Tensors2 d, float c, float[][] s, float[][] x, float[][] y) {
        float c1 = C71[1];
        float c2 = C71[2];
        float c3 = C71[3];
        int n1 = x[0].length;
        int n2 = x.length;
        float[] di = new float[3];
        float[] g1 = new float[n1];
        for (int i2 = 0; i2 < n2; ++i2) {
            int i2m3 = ArrayMath.max(0, i2 - 3);
            int i2p3 = ArrayMath.min(n2 - 1, i2 + 3);
            int i2m2 = ArrayMath.max(0, i2 - 2);
            int i2p2 = ArrayMath.min(n2 - 1, i2 + 2);
            int i2m1 = ArrayMath.max(0, i2 - 1);
            int i2p1 = ArrayMath.min(n2 - 1, i2 + 1);
            float[] xm1 = x[i2m1];
            float[] xm2 = x[i2m2];
            float[] xm3 = x[i2m3];
            float[] xp1 = x[i2p1];
            float[] xp2 = x[i2p2];
            float[] xp3 = x[i2p3];
            float[] ym1 = y[i2m1];
            float[] ym2 = y[i2m2];
            float[] ym3 = y[i2m3];
            float[] yp1 = y[i2p1];
            float[] yp2 = y[i2p2];
            float[] yp3 = y[i2p3];
            LocalDiffusionKernel.gf(C71, x[i2], g1);
            int i1 = 0;
            while (i1 < n1) {
                d.getTensor(i1, i2, di);
                float csi = s != null ? c * s[i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d22 = di[2] * csi;
                float x1 = g1[i1];
                float x2 = c1 * (xp1[i1] - xm1[i1]) + c2 * (xp2[i1] - xm2[i1]) + c3 * (xp3[i1] - xm3[i1]);
                float y1 = d11 * x1 + d12 * x2;
                float y2 = d12 * x1 + d22 * x2;
                g1[i1] = y1;
                float c1y2 = c1 * y2;
                int n = i1;
                yp1[n] = yp1[n] + c1y2;
                int n3 = i1;
                ym1[n3] = ym1[n3] - c1y2;
                float c2y2 = c2 * y2;
                int n4 = i1;
                yp2[n4] = yp2[n4] + c2y2;
                int n5 = i1;
                ym2[n5] = ym2[n5] - c2y2;
                float c3y2 = c3 * y2;
                int n6 = i1;
                yp3[n6] = yp3[n6] + c3y2;
                int n7 = i1++;
                ym3[n7] = ym3[n7] - c3y2;
            }
            LocalDiffusionKernel.gt(C71, g1, y[i2]);
        }
    }

    private void apply71(Tensors2 d, float c, float[][] s, float[][] x, float[][] y) {
        float c1 = C71[1];
        float c2 = C71[2];
        float c3 = C71[3];
        int n1 = x[0].length;
        int n2 = x.length;
        float[] di = new float[3];
        int i2m2 = 0;
        int i2m1 = 0;
        int i2p0 = 0;
        int i2p1 = 0;
        int i2p2 = 1;
        int i2p3 = 2;
        for (int i2 = 0; i2 < n2; ++i2) {
            int i2m3 = i2m2;
            i2m2 = i2m1;
            i2m1 = i2p0;
            i2p0 = i2p1;
            i2p1 = i2p2;
            i2p2 = i2p3++;
            if (i2p1 >= n2) {
                i2p1 = n2 - 1;
            }
            if (i2p2 >= n2) {
                i2p2 = n2 - 1;
            }
            if (i2p3 >= n2) {
                i2p3 = n2 - 1;
            }
            float[] xm3 = x[i2m3];
            float[] xm2 = x[i2m2];
            float[] xm1 = x[i2m1];
            float[] xp3 = x[i2p3];
            float[] xp2 = x[i2p2];
            float[] xp1 = x[i2p1];
            float[] xp0 = x[i2p0];
            float[] ym3 = y[i2m3];
            float[] ym2 = y[i2m2];
            float[] ym1 = y[i2m1];
            float[] yp3 = y[i2p3];
            float[] yp2 = y[i2p2];
            float[] yp1 = y[i2p1];
            float[] yp0 = y[i2p0];
            int m2 = 0;
            int m1 = 0;
            int p0 = 0;
            int p1 = 0;
            int p2 = 1;
            int p3 = 2;
            for (int i1 = 0; i1 < n1; ++i1) {
                int m3 = m2;
                m2 = m1;
                m1 = p0;
                p0 = p1;
                p1 = p2;
                p2 = p3++;
                if (p1 >= n1) {
                    p1 = n1 - 1;
                }
                if (p2 >= n1) {
                    p2 = n1 - 1;
                }
                if (p3 >= n1) {
                    p3 = n1 - 1;
                }
                d.getTensor(i1, i2, di);
                float csi = s != null ? c * s[i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d22 = di[2] * csi;
                float x1 = c1 * (xp0[p1] - xp0[m1]) + c2 * (xp0[p2] - xp0[m2]) + c3 * (xp0[p3] - xp0[m3]);
                float x2 = c1 * (xp1[p0] - xm1[p0]) + c2 * (xp2[p0] - xm2[p0]) + c3 * (xp3[p0] - xm3[p0]);
                float y1 = d11 * x1 + d12 * x2;
                float y2 = d12 * x1 + d22 * x2;
                float c1y1 = c1 * y1;
                int n = p1;
                yp0[n] = yp0[n] + c1y1;
                int n3 = m1;
                yp0[n3] = yp0[n3] - c1y1;
                float c2y1 = c2 * y1;
                int n4 = p2;
                yp0[n4] = yp0[n4] + c2y1;
                int n5 = m2;
                yp0[n5] = yp0[n5] - c2y1;
                float c3y1 = c3 * y1;
                int n6 = p3;
                yp0[n6] = yp0[n6] + c3y1;
                int n7 = m3;
                yp0[n7] = yp0[n7] - c3y1;
                float c1y2 = c1 * y2;
                int n8 = p0;
                yp1[n8] = yp1[n8] + c1y2;
                int n9 = p0;
                ym1[n9] = ym1[n9] - c1y2;
                float c2y2 = c2 * y2;
                int n10 = p0;
                yp2[n10] = yp2[n10] + c2y2;
                int n11 = p0;
                ym2[n11] = ym2[n11] - c2y2;
                float c3y2 = c3 * y2;
                int n12 = p0;
                yp3[n12] = yp3[n12] + c3y2;
                int n13 = p0;
                ym3[n13] = ym3[n13] - c3y2;
            }
        }
    }

    private void apply71X(int i3, Tensors3 d, float c, float[][][] s, float[][][] x, float[][][] y) {
        float c1 = C71[1];
        float c2 = C71[2];
        float c3 = C71[3];
        int n1 = x[0][0].length;
        int n2 = x[0].length;
        int n3 = x.length;
        float[] di = new float[6];
        int i3m3 = ArrayMath.max(0, i3 - 3);
        int i3p3 = ArrayMath.min(n3 - 1, i3 + 3);
        int i3m2 = ArrayMath.max(0, i3 - 2);
        int i3p2 = ArrayMath.min(n3 - 1, i3 + 2);
        int i3m1 = ArrayMath.max(0, i3 - 1);
        int i3p1 = ArrayMath.min(n3 - 1, i3 + 1);
        float[][] g1 = new float[n2][n1];
        float[][] g2 = new float[n2][n1];
        LocalDiffusionKernel.gf(C71, x[i3], g1, g2);
        for (int i2 = 0; i2 < n2; ++i2) {
            float[] xm1 = x[i3m1][i2];
            float[] xm2 = x[i3m2][i2];
            float[] xm3 = x[i3m3][i2];
            float[] xp1 = x[i3p1][i2];
            float[] xp2 = x[i3p2][i2];
            float[] xp3 = x[i3p3][i2];
            float[] ym1 = y[i3m1][i2];
            float[] ym2 = y[i3m2][i2];
            float[] ym3 = y[i3m3][i2];
            float[] yp1 = y[i3p1][i2];
            float[] yp2 = y[i3p2][i2];
            float[] yp3 = y[i3p3][i2];
            float[] g1i = g1[i2];
            float[] g2i = g2[i2];
            int i1 = 0;
            while (i1 < n1) {
                d.getTensor(i1, i2, i3, di);
                float csi = s != null ? c * s[i3][i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d13 = di[2] * csi;
                float d22 = di[3] * csi;
                float d23 = di[4] * csi;
                float d33 = di[5] * csi;
                float x1 = g1i[i1];
                float x2 = g2i[i1];
                float x3 = c1 * (xp1[i1] - xm1[i1]) + c2 * (xp2[i1] - xm2[i1]) + c3 * (xp3[i1] - xm3[i1]);
                float y1 = d11 * x1 + d12 * x2 + d13 * x3;
                float y2 = d12 * x1 + d22 * x2 + d23 * x3;
                float y3 = d13 * x1 + d23 * x2 + d33 * x3;
                g1i[i1] = y1;
                g2i[i1] = y2;
                float c1y3 = c1 * y3;
                int n = i1;
                yp1[n] = yp1[n] + c1y3;
                int n4 = i1;
                ym1[n4] = ym1[n4] - c1y3;
                float c2y3 = c2 * y3;
                int n5 = i1;
                yp2[n5] = yp2[n5] + c2y3;
                int n6 = i1;
                ym2[n6] = ym2[n6] - c2y3;
                float c3y3 = c3 * y3;
                int n7 = i1;
                yp3[n7] = yp3[n7] + c3y3;
                int n8 = i1++;
                ym3[n8] = ym3[n8] - c3y3;
            }
        }
        LocalDiffusionKernel.gt(C71, g1, g2, y[i3]);
    }

    private void apply71(int i3, Tensors3 d, float c, float[][][] s, float[][][] x, float[][][] y) {
        int i3p3;
        int i3p2;
        int i3m1;
        int i3m2;
        float c1 = C71[1];
        float c2 = C71[2];
        float c3 = C71[3];
        int n1 = x[0][0].length;
        int n2 = x[0].length;
        int n3 = x.length;
        float[] di = new float[6];
        int i3m3 = i3 - 3;
        if (i3m3 < 0) {
            i3m3 = 0;
        }
        if ((i3m2 = i3 - 2) < 0) {
            i3m2 = 0;
        }
        if ((i3m1 = i3 - 1) < 0) {
            i3m1 = 0;
        }
        int i3p0 = i3;
        int i3p1 = i3 + 1;
        if (i3p1 >= n3) {
            i3p1 = n3 - 1;
        }
        if ((i3p2 = i3 + 2) >= n3) {
            i3p2 = n3 - 1;
        }
        if ((i3p3 = i3 + 3) >= n3) {
            i3p3 = n3 - 1;
        }
        int i2m2 = 0;
        int i2m1 = 0;
        int i2p0 = 0;
        int i2p1 = 0;
        int i2p2 = 1;
        int i2p3 = 2;
        for (int i2 = 0; i2 < n2; ++i2) {
            int i2m3 = i2m2;
            i2m2 = i2m1;
            i2m1 = i2p0;
            i2p0 = i2p1;
            i2p1 = i2p2;
            i2p2 = i2p3++;
            if (i2p1 >= n2) {
                i2p1 = n2 - 1;
            }
            if (i2p2 >= n2) {
                i2p2 = n2 - 1;
            }
            if (i2p3 >= n2) {
                i2p3 = n2 - 1;
            }
            float[] xp0p0 = x[i3p0][i2p0];
            float[] yp0p0 = y[i3p0][i2p0];
            float[] xp0m3 = x[i3p0][i2m3];
            float[] yp0m3 = y[i3p0][i2m3];
            float[] xp0m2 = x[i3p0][i2m2];
            float[] yp0m2 = y[i3p0][i2m2];
            float[] xp0m1 = x[i3p0][i2m1];
            float[] yp0m1 = y[i3p0][i2m1];
            float[] xp0p1 = x[i3p0][i2p1];
            float[] yp0p1 = y[i3p0][i2p1];
            float[] xp0p2 = x[i3p0][i2p2];
            float[] yp0p2 = y[i3p0][i2p2];
            float[] xp0p3 = x[i3p0][i2p3];
            float[] yp0p3 = y[i3p0][i2p3];
            float[] xm3p0 = x[i3m3][i2p0];
            float[] ym3p0 = y[i3m3][i2p0];
            float[] xm2p0 = x[i3m2][i2p0];
            float[] ym2p0 = y[i3m2][i2p0];
            float[] xm1p0 = x[i3m1][i2p0];
            float[] ym1p0 = y[i3m1][i2p0];
            float[] xp1p0 = x[i3p1][i2p0];
            float[] yp1p0 = y[i3p1][i2p0];
            float[] xp2p0 = x[i3p2][i2p0];
            float[] yp2p0 = y[i3p2][i2p0];
            float[] xp3p0 = x[i3p3][i2p0];
            float[] yp3p0 = y[i3p3][i2p0];
            int m2 = 0;
            int m1 = 0;
            int p0 = 0;
            int p1 = 0;
            int p2 = 1;
            int p3 = 2;
            for (int i1 = 0; i1 < n1; ++i1) {
                int m3 = m2;
                m2 = m1;
                m1 = p0;
                p0 = p1;
                p1 = p2;
                p2 = p3++;
                if (p1 >= n1) {
                    p1 = n1 - 1;
                }
                if (p2 >= n1) {
                    p2 = n1 - 1;
                }
                if (p3 >= n1) {
                    p3 = n1 - 1;
                }
                d.getTensor(i1, i2, i3, di);
                float csi = s != null ? c * s[i3][i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d13 = di[2] * csi;
                float d22 = di[3] * csi;
                float d23 = di[4] * csi;
                float d33 = di[5] * csi;
                float x1 = c1 * (xp0p0[p1] - xp0p0[m1]) + c2 * (xp0p0[p2] - xp0p0[m2]) + c3 * (xp0p0[p3] - xp0p0[m3]);
                float x2 = c1 * (xp0p1[p0] - xp0m1[p0]) + c2 * (xp0p2[p0] - xp0m2[p0]) + c3 * (xp0p3[p0] - xp0m3[p0]);
                float x3 = c1 * (xp1p0[p0] - xm1p0[p0]) + c2 * (xp2p0[p0] - xm2p0[p0]) + c3 * (xp3p0[p0] - xm3p0[p0]);
                float y1 = d11 * x1 + d12 * x2 + d13 * x3;
                float y2 = d12 * x1 + d22 * x2 + d23 * x3;
                float y3 = d13 * x1 + d23 * x2 + d33 * x3;
                float c1y1 = c1 * y1;
                int n = p1;
                yp0p0[n] = yp0p0[n] + c1y1;
                int n4 = m1;
                yp0p0[n4] = yp0p0[n4] - c1y1;
                float c2y1 = c2 * y1;
                int n5 = p2;
                yp0p0[n5] = yp0p0[n5] + c2y1;
                int n6 = m2;
                yp0p0[n6] = yp0p0[n6] - c2y1;
                float c3y1 = c3 * y1;
                int n7 = p3;
                yp0p0[n7] = yp0p0[n7] + c3y1;
                int n8 = m3;
                yp0p0[n8] = yp0p0[n8] - c3y1;
                float c1y2 = c1 * y2;
                int n9 = p0;
                yp0p1[n9] = yp0p1[n9] + c1y2;
                int n10 = p0;
                yp0m1[n10] = yp0m1[n10] - c1y2;
                float c2y2 = c2 * y2;
                int n11 = p0;
                yp0p2[n11] = yp0p2[n11] + c2y2;
                int n12 = p0;
                yp0m2[n12] = yp0m2[n12] - c2y2;
                float c3y2 = c3 * y2;
                int n13 = p0;
                yp0p3[n13] = yp0p3[n13] + c3y2;
                int n14 = p0;
                yp0m3[n14] = yp0m3[n14] - c3y2;
                float c1y3 = c1 * y3;
                int n15 = p0;
                yp1p0[n15] = yp1p0[n15] + c1y3;
                int n16 = p0;
                ym1p0[n16] = ym1p0[n16] - c1y3;
                float c2y3 = c2 * y3;
                int n17 = p0;
                yp2p0[n17] = yp2p0[n17] + c2y3;
                int n18 = p0;
                ym2p0[n18] = ym2p0[n18] - c2y3;
                float c3y3 = c3 * y3;
                int n19 = p0;
                yp3p0[n19] = yp3p0[n19] + c3y3;
                int n20 = p0;
                ym3p0[n20] = ym3p0[n20] - c3y3;
            }
        }
    }

    private void apply91(Tensors2 d, float c, float[][] s, float[][] x, float[][] y) {
        float c1 = C91[1];
        float c2 = C91[2];
        float c3 = C91[3];
        float c4 = C91[4];
        int n1 = x[0].length;
        int n2 = x.length;
        float[] di = new float[3];
        int i2m3 = 0;
        int i2m2 = 0;
        int i2m1 = 0;
        int i2p0 = 0;
        int i2p1 = 0;
        int i2p2 = 1;
        int i2p3 = 2;
        int i2p4 = 3;
        for (int i2 = 0; i2 < n2; ++i2) {
            int i2m4 = i2m3;
            i2m3 = i2m2;
            i2m2 = i2m1;
            i2m1 = i2p0;
            i2p0 = i2p1;
            i2p1 = i2p2;
            i2p2 = i2p3;
            i2p3 = i2p4++;
            if (i2p1 >= n2) {
                i2p1 = n2 - 1;
            }
            if (i2p2 >= n2) {
                i2p2 = n2 - 1;
            }
            if (i2p3 >= n2) {
                i2p3 = n2 - 1;
            }
            if (i2p4 >= n2) {
                i2p4 = n2 - 1;
            }
            float[] xm4 = x[i2m4];
            float[] xm3 = x[i2m3];
            float[] xm2 = x[i2m2];
            float[] xm1 = x[i2m1];
            float[] xp4 = x[i2p4];
            float[] xp3 = x[i2p3];
            float[] xp2 = x[i2p2];
            float[] xp1 = x[i2p1];
            float[] xp0 = x[i2p0];
            float[] ym4 = y[i2m4];
            float[] ym3 = y[i2m3];
            float[] ym2 = y[i2m2];
            float[] ym1 = y[i2m1];
            float[] yp4 = y[i2p4];
            float[] yp3 = y[i2p3];
            float[] yp2 = y[i2p2];
            float[] yp1 = y[i2p1];
            float[] yp0 = y[i2p0];
            int m3 = 0;
            int m2 = 0;
            int m1 = 0;
            int p0 = 0;
            int p1 = 0;
            int p2 = 1;
            int p3 = 2;
            int p4 = 3;
            for (int i1 = 0; i1 < n1; ++i1) {
                int m4 = m3;
                m3 = m2;
                m2 = m1;
                m1 = p0;
                p0 = p1;
                p1 = p2;
                p2 = p3;
                p3 = p4++;
                if (p1 >= n1) {
                    p1 = n1 - 1;
                }
                if (p2 >= n1) {
                    p2 = n1 - 1;
                }
                if (p3 >= n1) {
                    p3 = n1 - 1;
                }
                if (p4 >= n1) {
                    p4 = n1 - 1;
                }
                d.getTensor(i1, i2, di);
                float csi = s != null ? c * s[i2][i1] : c;
                float d11 = di[0] * csi;
                float d12 = di[1] * csi;
                float d22 = di[2] * csi;
                float x1 = c1 * (xp0[p1] - xp0[m1]) + c2 * (xp0[p2] - xp0[m2]) + c3 * (xp0[p3] - xp0[m3]) + c4 * (xp0[p4] - xp0[m4]);
                float x2 = c1 * (xp1[p0] - xm1[p0]) + c2 * (xp2[p0] - xm2[p0]) + c3 * (xp3[p0] - xm3[p0]) + c4 * (xp4[p0] - xm4[p0]);
                float y1 = d11 * x1 + d12 * x2;
                float y2 = d12 * x1 + d22 * x2;
                float c1y1 = c1 * y1;
                int n = p1;
                yp0[n] = yp0[n] + c1y1;
                int n3 = m1;
                yp0[n3] = yp0[n3] - c1y1;
                float c2y1 = c2 * y1;
                int n4 = p2;
                yp0[n4] = yp0[n4] + c2y1;
                int n5 = m2;
                yp0[n5] = yp0[n5] - c2y1;
                float c3y1 = c3 * y1;
                int n6 = p3;
                yp0[n6] = yp0[n6] + c3y1;
                int n7 = m3;
                yp0[n7] = yp0[n7] - c3y1;
                float c4y1 = c4 * y1;
                int n8 = p4;
                yp0[n8] = yp0[n8] + c4y1;
                int n9 = m4;
                yp0[n9] = yp0[n9] - c4y1;
                float c1y2 = c1 * y2;
                int n10 = p0;
                yp1[n10] = yp1[n10] + c1y2;
                int n11 = p0;
                ym1[n11] = ym1[n11] - c1y2;
                float c2y2 = c2 * y2;
                int n12 = p0;
                yp2[n12] = yp2[n12] + c2y2;
                int n13 = p0;
                ym2[n13] = ym2[n13] - c2y2;
                float c3y2 = c3 * y2;
                int n14 = p0;
                yp3[n14] = yp3[n14] + c3y2;
                int n15 = p0;
                ym3[n15] = ym3[n15] - c3y2;
                float c4y2 = c4 * y2;
                int n16 = p0;
                yp4[n16] = yp4[n16] + c4y2;
                int n17 = p0;
                ym4[n17] = ym4[n17] - c4y2;
            }
        }
    }

    private static void gf(float[] c, float[] x, float[] y) {
        float ci;
        int ic;
        float yi;
        int i1;
        int nc = c.length - 1;
        int n1 = x.length;
        int n1m1 = n1 - 1;
        int n1nc = n1 - nc;
        for (i1 = 0; i1 < ArrayMath.min(nc, n1nc); ++i1) {
            yi = 0.0f;
            for (ic = 1; ic <= nc; ++ic) {
                int ip;
                ci = c[ic];
                int im = i1 - ic;
                if (im < 0) {
                    im = 0;
                }
                if ((ip = i1 + ic) > n1m1) {
                    ip = n1m1;
                }
                yi += ci * (x[ip] - x[im]);
            }
            y[i1] = yi;
        }
        if (nc == 3 && n1 > 6) {
            float c1 = c[1];
            float c2 = c[2];
            float c3 = c[3];
            float xm2 = x[0];
            float xm1 = x[1];
            float xp0 = x[2];
            float xp1 = x[3];
            float xp2 = x[4];
            float xp3 = x[5];
            for (int i12 = 3; i12 < n1nc; ++i12) {
                float xm3 = xm2;
                xm2 = xm1;
                xm1 = xp0;
                xp0 = xp1;
                xp1 = xp2;
                xp2 = xp3;
                xp3 = x[i12 + 3];
                y[i12] = c1 * (xp1 - xm1) + c2 * (xp2 - xm2) + c3 * (xp3 - xm3);
            }
        } else {
            for (i1 = nc; i1 < n1nc; ++i1) {
                yi = 0.0f;
                for (ic = 1; ic <= nc; ++ic) {
                    yi += c[ic] * (x[i1 + ic] - x[i1 - ic]);
                }
                y[i1] = yi;
            }
        }
        for (int i13 = ArrayMath.max(n1nc, 0); i13 < n1; ++i13) {
            yi = 0.0f;
            for (int ic2 = 1; ic2 <= nc; ++ic2) {
                int ip;
                ci = c[ic2];
                int im = i13 - ic2;
                if (im < 0) {
                    im = 0;
                }
                if ((ip = i13 + ic2) > n1m1) {
                    ip = n1m1;
                }
                yi += ci * (x[ip] - x[im]);
            }
            y[i13] = yi;
        }
    }

    private static void gt(float[] c, float[] x, float[] y) {
        float ci;
        int ic;
        float xi;
        int i1;
        int nc = c.length - 1;
        int n1 = x.length;
        int n1m1 = n1 - 1;
        int n1nc = n1 - nc;
        for (i1 = 0; i1 < ArrayMath.min(2 * nc, n1); ++i1) {
            xi = x[i1];
            for (ic = 1; ic <= nc; ++ic) {
                int ip;
                ci = c[ic];
                int im = i1 - ic;
                if (im < 0) {
                    im = 0;
                }
                if ((ip = i1 + ic) > n1m1) {
                    ip = n1m1;
                }
                if (im < nc) {
                    int n = im;
                    y[n] = y[n] - ci * xi;
                }
                if (ip >= nc) continue;
                int n = ip;
                y[n] = y[n] + ci * xi;
            }
        }
        if (nc == 3 && n1 > 6) {
            float c1 = c[1];
            float c2 = c[2];
            float c3 = c[3];
            float xm2 = x[0];
            float xm1 = x[1];
            float xp0 = x[2];
            float xp1 = x[3];
            float xp2 = x[4];
            float xp3 = x[5];
            int i12 = 3;
            while (i12 < n1nc) {
                float xm3 = xm2;
                xm2 = xm1;
                xm1 = xp0;
                xp0 = xp1;
                xp1 = xp2;
                xp2 = xp3;
                xp3 = x[i12 + 3];
                int n = i12++;
                y[n] = y[n] + (c1 * (xm1 - xp1) + c2 * (xm2 - xp2) + c3 * (xm3 - xp3));
            }
        } else {
            for (i1 = nc; i1 < n1nc; ++i1) {
                float yi = y[i1];
                for (ic = 1; ic <= nc; ++ic) {
                    yi += c[ic] * (x[i1 - ic] - x[i1 + ic]);
                }
                y[i1] = yi;
            }
        }
        n1nc = ArrayMath.max(n1nc, nc);
        for (int i13 = ArrayMath.max(n1 - 2 * nc, 0); i13 < n1; ++i13) {
            xi = x[i13];
            for (int ic2 = 1; ic2 <= nc; ++ic2) {
                int ip;
                ci = c[ic2];
                int im = i13 - ic2;
                if (im < 0) {
                    im = 0;
                }
                if ((ip = i13 + ic2) > n1m1) {
                    ip = n1m1;
                }
                if (im >= n1nc) {
                    int n = im;
                    y[n] = y[n] - ci * xi;
                }
                if (ip < n1nc) continue;
                int n = ip;
                y[n] = y[n] + ci * xi;
            }
        }
    }

    private static void gf1(float[] c, float[][] x, float[][] g1) {
        int n2 = x.length;
        for (int i2 = 0; i2 < n2; ++i2) {
            LocalDiffusionKernel.gf(c, x[i2], g1[i2]);
        }
    }

    private static void gf2(float[] c, float[][] x, float[][] g2) {
        int nc = c.length - 1;
        int n1 = x[0].length;
        int n2 = x.length;
        if (nc == 3) {
            float c1 = C71[1];
            float c2 = C71[2];
            float c3 = C71[3];
            int n2m1 = n2 - 1;
            int n2m2 = n2 - 2;
            int n2m3 = n2 - 3;
            for (int i2 = 0; i2 < n2; ++i2) {
                float[] xm3 = i2 >= 3 ? x[i2 - 3] : x[0];
                float[] xm2 = i2 >= 2 ? x[i2 - 2] : x[0];
                float[] xm1 = i2 >= 1 ? x[i2 - 1] : x[0];
                float[] xp1 = i2 < n2m1 ? x[i2 + 1] : x[n2m1];
                float[] xp2 = i2 < n2m2 ? x[i2 + 2] : x[n2m1];
                float[] xp3 = i2 < n2m3 ? x[i2 + 3] : x[n2m1];
                float[] g2i = g2[i2];
                for (int i1 = 0; i1 < n1; ++i1) {
                    g2i[i1] = c1 * (xp1[i1] - xm1[i1]) + c2 * (xp2[i1] - xm2[i1]) + c3 * (xp3[i1] - xm3[i1]);
                }
            }
        } else {
            int n2m1 = n2 - 1;
            for (int i2 = 0; i2 < n2; ++i2) {
                float[] g2i = g2[i2];
                ArrayMath.zero(g2i);
                for (int ic = 1; ic <= nc; ++ic) {
                    float ci = c[ic];
                    float[] xm = i2 >= ic ? x[i2 - ic] : x[0];
                    float[] xp = i2 < n2 - ic ? x[i2 + ic] : x[n2m1];
                    for (int i1 = 0; i1 < n1; ++i1) {
                        int n = i1;
                        g2i[n] = g2i[n] + ci * (xp[i1] - xm[i1]);
                    }
                }
            }
        }
    }

    private static void gt1(float[] c, float[][] g1, float[][] x) {
        int n2 = x.length;
        for (int i2 = 0; i2 < n2; ++i2) {
            LocalDiffusionKernel.gt(c, g1[i2], x[i2]);
        }
    }

    private static void gt2(float[] c, float[][] g2, float[][] x) {
        float ci;
        int ic;
        int i2;
        int nc = c.length - 1;
        int n1 = x[0].length;
        int n2 = x.length;
        int n2m1 = n2 - 1;
        int n2nc = n2 - nc;
        for (i2 = 0; i2 < ArrayMath.min(2 * nc, n2); ++i2) {
            float[] g2i = g2[i2];
            for (ic = 1; ic <= nc; ++ic) {
                int i1;
                int ip;
                ci = c[ic];
                int im = i2 - ic;
                if (im < 0) {
                    im = 0;
                }
                if ((ip = i2 + ic) > n2m1) {
                    ip = n2m1;
                }
                if (im < nc) {
                    float[] x2m = x[im];
                    for (i1 = 0; i1 < n1; ++i1) {
                        int n = i1;
                        x2m[n] = x2m[n] - ci * g2i[i1];
                    }
                }
                if (ip >= nc) continue;
                float[] x2p = x[ip];
                for (i1 = 0; i1 < n1; ++i1) {
                    int n = i1;
                    x2p[n] = x2p[n] + ci * g2i[i1];
                }
            }
        }
        if (nc == 3 && n1 > 6) {
            float c1 = c[1];
            float c2 = c[2];
            float c3 = c[3];
            for (int i22 = 3; i22 < n2 - 3; ++i22) {
                float[] gm3 = g2[i22 - 3];
                float[] gm2 = g2[i22 - 2];
                float[] gm1 = g2[i22 - 1];
                float[] gp1 = g2[i22 + 1];
                float[] gp2 = g2[i22 + 2];
                float[] gp3 = g2[i22 + 3];
                float[] x2 = x[i22];
                for (int i1 = 0; i1 < n1; ++i1) {
                    int n = i1;
                    x2[n] = x2[n] + (c1 * (gm1[i1] - gp1[i1]) + c2 * (gm2[i1] - gp2[i1]) + c3 * (gm3[i1] - gp3[i1]));
                }
            }
        } else {
            for (i2 = nc; i2 < n2 - nc; ++i2) {
                float[] x2 = x[i2];
                for (ic = 1; ic <= nc; ++ic) {
                    ci = c[ic];
                    float[] g2m = g2[i2 - ic];
                    float[] g2p = g2[i2 + ic];
                    for (int i1 = 0; i1 < n1; ++i1) {
                        int n = i1;
                        x2[n] = x2[n] + ci * (g2m[i1] - g2p[i1]);
                    }
                }
            }
        }
        n2nc = ArrayMath.max(n2nc, nc);
        for (int i23 = ArrayMath.max(n2 - 2 * nc, 0); i23 < n2; ++i23) {
            float[] g2i = g2[i23];
            for (int ic2 = 1; ic2 <= nc; ++ic2) {
                int i1;
                int ip;
                float ci2 = c[ic2];
                int im = i23 - ic2;
                if (im < 0) {
                    im = 0;
                }
                if ((ip = i23 + ic2) > n2m1) {
                    ip = n2m1;
                }
                if (im >= n2nc) {
                    float[] x2m = x[im];
                    for (i1 = 0; i1 < n1; ++i1) {
                        int n = i1;
                        x2m[n] = x2m[n] - ci2 * g2i[i1];
                    }
                }
                if (ip < n2nc) continue;
                float[] x2p = x[ip];
                for (i1 = 0; i1 < n1; ++i1) {
                    int n = i1;
                    x2p[n] = x2p[n] + ci2 * g2i[i1];
                }
            }
        }
    }

    private static void gf(float[] c, float[][] x, float[][] g1, float[][] g2) {
        LocalDiffusionKernel.gf1(c, x, g1);
        LocalDiffusionKernel.gf2(c, x, g2);
    }

    private static void gt(float[] c, float[][] g1, float[][] g2, float[][] x) {
        LocalDiffusionKernel.gt2(c, g2, x);
        LocalDiffusionKernel.gt1(c, g1, x);
    }

    private static void testGrad1() {
        int n = 21;
        float[] x = ArrayMath.randfloat(n);
        float[] y = ArrayMath.randfloat(n);
        float[] gx = ArrayMath.zerofloat(n);
        float[] gy = ArrayMath.zerofloat(n);
        LocalDiffusionKernel.gf(C71, x, gx);
        LocalDiffusionKernel.gt(C71, y, gy);
        ArrayMath.dump(gx);
        ArrayMath.dump(gy);
        float ygx = ArrayMath.sum(ArrayMath.mul(y, gx));
        float xgy = ArrayMath.sum(ArrayMath.mul(x, gy));
        LocalDiffusionKernel.trace("ygx=" + ygx);
        LocalDiffusionKernel.trace("xgy=" + xgy);
    }

    private static void testGrad2() {
        int n1 = 11;
        int n2 = 21;
        float[][] x = ArrayMath.randfloat(n1, n2);
        float[][] y1 = ArrayMath.randfloat(n1, n2);
        float[][] y2 = ArrayMath.randfloat(n1, n2);
        float[][] y = ArrayMath.zerofloat(n1, n2);
        float[][] x1 = ArrayMath.zerofloat(n1, n2);
        float[][] x2 = ArrayMath.zerofloat(n1, n2);
        LocalDiffusionKernel.gf(C71, x, x1, x2);
        LocalDiffusionKernel.gt(C71, y1, y2, y);
        float ygx = ArrayMath.sum(ArrayMath.add(ArrayMath.mul(y1, x1), ArrayMath.mul(y2, x2)));
        float xgy = ArrayMath.sum(ArrayMath.mul(x, y));
        LocalDiffusionKernel.trace("ygx=" + ygx);
        LocalDiffusionKernel.trace("xgy=" + xgy);
    }

    public static void main(String[] args) {
    }

    public static enum Stencil {
        D21,
        D22,
        D24,
        D33,
        D71,
        D91;

    }
}

