/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.matrix.data;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockCSR;
import org.apache.sysds.runtime.data.SparseBlockFactory;
import org.apache.sysds.runtime.data.SparseBlockMCSR;
import org.apache.sysds.runtime.data.SparseRow;
import org.apache.sysds.runtime.data.SparseRowVector;
import org.apache.sysds.runtime.functionobjects.And;
import org.apache.sysds.runtime.functionobjects.Builtin;
import org.apache.sysds.runtime.functionobjects.Divide;
import org.apache.sysds.runtime.functionobjects.Equals;
import org.apache.sysds.runtime.functionobjects.GreaterThan;
import org.apache.sysds.runtime.functionobjects.GreaterThanEquals;
import org.apache.sysds.runtime.functionobjects.LessThan;
import org.apache.sysds.runtime.functionobjects.LessThanEquals;
import org.apache.sysds.runtime.functionobjects.Minus;
import org.apache.sysds.runtime.functionobjects.MinusMultiply;
import org.apache.sysds.runtime.functionobjects.Multiply;
import org.apache.sysds.runtime.functionobjects.Multiply2;
import org.apache.sysds.runtime.functionobjects.NotEquals;
import org.apache.sysds.runtime.functionobjects.Plus;
import org.apache.sysds.runtime.functionobjects.PlusMultiply;
import org.apache.sysds.runtime.functionobjects.Power2;
import org.apache.sysds.runtime.functionobjects.ValueFunction;
import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
import org.apache.sysds.runtime.matrix.data.LibMatrixOuterAgg;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
import org.apache.sysds.runtime.util.CommonThreadPool;
import org.apache.sysds.runtime.util.DataConverter;
import org.apache.sysds.runtime.util.SortUtils;
import org.apache.sysds.runtime.util.UtilFunctions;

public class LibMatrixBincell {
    private static final Log LOG = LogFactory.getLog((String)LibMatrixBincell.class.getName());
    private static final long PAR_NUMCELL_THRESHOLD2 = 16384L;

    private LibMatrixBincell() {
    }

    public static MatrixBlock uncellOp(MatrixBlock m1, MatrixBlock ret, UnaryOperator op) {
        if (!m1.sparse && !m1.isEmptyBlock(false) && op.getNumThreads() > 1 && m1.getLength() > 16384L) {
            if (!op.isInplace() || m1.isEmpty()) {
                ret.allocateDenseBlock(false);
            } else {
                ret = m1;
            }
            int k = op.getNumThreads();
            DenseBlock a = m1.getDenseBlock();
            DenseBlock c = ret.getDenseBlock();
            try {
                ExecutorService pool = CommonThreadPool.get(k);
                ArrayList<UncellTask> tasks = new ArrayList<UncellTask>();
                ArrayList<Integer> blklens = UtilFunctions.getBalancedBlockSizesDefault(ret.rlen, k, false);
                int lb = 0;
                for (int i = 0; i < blklens.size(); ++i) {
                    tasks.add(new UncellTask(a, c, op, lb, lb + blklens.get(i)));
                    lb += blklens.get(i).intValue();
                }
                List taskret = pool.invokeAll(tasks);
                ret.nonZeros = 0L;
                for (Future task : taskret) {
                    ret.nonZeros += ((Long)task.get()).longValue();
                }
                pool.shutdown();
            }
            catch (InterruptedException | ExecutionException ex) {
                throw new DMLRuntimeException(ex);
            }
        } else {
            if (op.isInplace() && !m1.isInSparseFormat()) {
                ret = m1;
            }
            if (op.sparseSafe) {
                LibMatrixBincell.sparseUnaryOperations(m1, ret, op);
            } else {
                LibMatrixBincell.denseUnaryOperations(m1, ret, op);
            }
        }
        return ret;
    }

    public static void bincellOp(MatrixBlock m1, MatrixBlock ret, ScalarOperator op) {
        if (op.sparseSafe && m1.isInSparseFormat() != ret.isInSparseFormat() || !op.sparseSafe && ret.isInSparseFormat()) {
            throw new DMLRuntimeException("Wrong output representation for safe=" + op.sparseSafe + ": " + m1.isInSparseFormat() + ", " + ret.isInSparseFormat());
        }
        if (op.sparseSafe) {
            LibMatrixBincell.safeBinaryScalar(m1, ret, op, 0, m1.rlen);
        } else {
            LibMatrixBincell.unsafeBinaryScalar(m1, ret, op);
        }
        if (ret.isEmptyBlock(false)) {
            ret.examSparsity();
        }
    }

    public static void bincellOp(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int k) {
        if (op.sparseSafe && m1.isInSparseFormat() != ret.isInSparseFormat() || !op.sparseSafe && ret.isInSparseFormat()) {
            throw new DMLRuntimeException("Wrong output representation for safe=" + op.sparseSafe + ": " + m1.isInSparseFormat() + ", " + ret.isInSparseFormat());
        }
        if (m1.isEmpty() || !op.sparseSafe || ret.getLength() < 16384L) {
            LibMatrixBincell.bincellOp(m1, ret, op);
            return;
        }
        ret.allocateBlock();
        try {
            ExecutorService pool = CommonThreadPool.get(k);
            ArrayList<BincellScalarTask> tasks = new ArrayList<BincellScalarTask>();
            ArrayList<Integer> blklens = UtilFunctions.getBalancedBlockSizesDefault(ret.rlen, k, false);
            int lb = 0;
            for (int i = 0; i < blklens.size(); ++i) {
                tasks.add(new BincellScalarTask(m1, ret, op, lb, lb + blklens.get(i)));
                lb += blklens.get(i).intValue();
            }
            List taskret = pool.invokeAll(tasks);
            ret.nonZeros = 0L;
            for (Future task : taskret) {
                ret.nonZeros += ((Long)task.get()).longValue();
            }
            pool.shutdown();
        }
        catch (InterruptedException | ExecutionException ex) {
            throw new DMLRuntimeException(ex);
        }
        if (ret.isEmptyBlock(false)) {
            ret.examSparsity();
        }
    }

    public static void bincellOp(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        if (atype == BinaryAccessType.MATRIX_MATRIX && !m1.isEmpty() && !m2.isEmpty()) {
            ret.allocateBlock();
        }
        long nnz = 0L;
        nnz = op.sparseSafe || LibMatrixBincell.isSparseSafeDivide(op, m2) ? LibMatrixBincell.safeBinary(m1, m2, ret, op, atype, 0, m1.rlen) : LibMatrixBincell.unsafeBinary(m1, m2, ret, op, 0, m1.rlen);
        ret.setNonZeros(nnz);
        if (ret.isEmptyBlock(false)) {
            ret.examSparsity();
        }
    }

    public static void bincellOp(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int k) {
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        if (m1.isEmpty() || m2.isEmpty() || ret.getLength() < 16384L || (op.sparseSafe || LibMatrixBincell.isSparseSafeDivide(op, m2)) && atype != BinaryAccessType.MATRIX_MATRIX && (!atype.isMatrixVector() || !LibMatrixBincell.isAllDense(m1, m2, ret))) {
            LibMatrixBincell.bincellOp(m1, m2, ret, op);
            return;
        }
        ret.allocateBlock();
        try {
            ExecutorService pool = CommonThreadPool.get(k);
            ArrayList<BincellTask> tasks = new ArrayList<BincellTask>();
            ArrayList<Integer> blklens = UtilFunctions.getBalancedBlockSizesDefault(ret.rlen, k, false);
            int lb = 0;
            for (int i = 0; i < blklens.size(); ++i) {
                tasks.add(new BincellTask(m1, m2, ret, op, atype, lb, lb + blklens.get(i)));
                lb += blklens.get(i).intValue();
            }
            List taskret = pool.invokeAll(tasks);
            ret.nonZeros = 0L;
            for (Future task : taskret) {
                ret.nonZeros += ((Long)task.get()).longValue();
            }
            pool.shutdown();
        }
        catch (InterruptedException | ExecutionException ex) {
            throw new DMLRuntimeException(ex);
        }
        if (ret.isEmptyBlock(false)) {
            ret.examSparsity();
        }
    }

    public static MatrixBlock bincellOpInPlace(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        return LibMatrixBincell.bincellOpInPlaceRight(m1ret, m2, op);
    }

    public static MatrixBlock bincellOpInPlaceRight(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (op.sparseSafe || LibMatrixBincell.isSparseSafeDivide(op, m2)) {
            LibMatrixBincell.safeBinaryInPlace(m1ret, m2, op);
        } else {
            LibMatrixBincell.unsafeBinaryInPlace(m1ret, m2, op);
        }
        if (m1ret.isEmptyBlock(false)) {
            m1ret.examSparsity();
        }
        return m1ret;
    }

    public static MatrixBlock bincellOpInPlaceLeft(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        int nRows = m1ret.getNumRows();
        int nCols = m1ret.getNumColumns();
        if (m1ret.isInSparseFormat()) {
            LOG.warn((Object)"Inefficient bincell op in place left, because output is materialized in new matrix");
            MatrixBlock right = new MatrixBlock(nRows, nCols, true);
            right.copyShallow(m1ret);
            m1ret.cleanupBlock(true, true);
            LibMatrixBincell.bincellOp(m2, right, m1ret, op);
            return m1ret;
        }
        double[] retV = m1ret.getDenseBlockValues();
        ValueFunction f = op.fn;
        if (m2.isInSparseFormat() && op.sparseSafe) {
            SparseBlock sb = m2.getSparseBlock();
            for (int row = 0; row < nRows; ++row) {
                if (sb.isEmpty(row)) continue;
                int apos = sb.pos(row);
                int alen = sb.size(row) + apos;
                int[] aix = sb.indexes(row);
                double[] aval = sb.values(row);
                int offsetV = row * nCols;
                for (int j = apos; j < alen; ++j) {
                    int idx = offsetV + aix[j];
                    retV[idx] = f.execute(aval[j], retV[idx]);
                }
            }
        } else {
            if (m2.isInSparseFormat()) {
                throw new NotImplementedException("Not implemented left bincell in place unsafe operations");
            }
            double[] m2V = m2.getDenseBlockValues();
            int size = nRows * nCols;
            for (int i = 0; i < size; ++i) {
                retV[i] = f.execute(m2V[i], retV[i]);
            }
            if (m1ret.isEmptyBlock(false)) {
                m1ret.examSparsity();
            }
        }
        return m1ret;
    }

    public static BinaryAccessType getBinaryAccessType(MatrixBlock m1, MatrixBlock m2) {
        int rlen1 = m1.rlen;
        int rlen2 = m2.rlen;
        int clen1 = m1.clen;
        int clen2 = m2.clen;
        if (rlen1 == rlen2 && clen1 == clen2) {
            return BinaryAccessType.MATRIX_MATRIX;
        }
        if (clen1 > 1 && clen2 == 1) {
            return BinaryAccessType.MATRIX_COL_VECTOR;
        }
        if (rlen1 > 1 && clen1 > 1 && rlen2 == 1) {
            return BinaryAccessType.MATRIX_ROW_VECTOR;
        }
        if (clen1 == 1 && rlen2 == 1) {
            return BinaryAccessType.OUTER_VECTOR_VECTOR;
        }
        return BinaryAccessType.INVALID;
    }

    public static BinaryAccessType getBinaryAccessTypeExtended(MatrixBlock m1, MatrixBlock m2) {
        int rlen1 = m1.rlen;
        int rlen2 = m2.rlen;
        int clen1 = m1.clen;
        int clen2 = m2.clen;
        if (rlen1 == rlen2) {
            if (clen1 == clen2) {
                return BinaryAccessType.MATRIX_MATRIX;
            }
            if (clen1 < clen2) {
                return BinaryAccessType.COL_VECTOR_MATRIX;
            }
            return BinaryAccessType.MATRIX_COL_VECTOR;
        }
        if (clen1 == clen2) {
            if (rlen1 < rlen2) {
                return BinaryAccessType.ROW_VECTOR_MATRIX;
            }
            return BinaryAccessType.MATRIX_ROW_VECTOR;
        }
        if (clen1 == 1 && rlen2 == 1) {
            return BinaryAccessType.OUTER_VECTOR_VECTOR;
        }
        return BinaryAccessType.INVALID;
    }

    public static void isValidDimensionsBinary(MatrixBlock m1, MatrixBlock m2) {
        boolean isValid;
        int rlen1 = m1.rlen;
        int clen1 = m1.clen;
        int rlen2 = m2.rlen;
        int clen2 = m2.clen;
        boolean bl = isValid = rlen1 == rlen2 && clen1 == clen2 || rlen1 == rlen2 && clen1 > 1 && clen2 == 1 || clen1 == clen2 && rlen1 > 1 && rlen2 == 1 || clen1 == 1 && rlen2 == 1;
        if (!isValid) {
            throw new DMLRuntimeException("Block sizes are not matched for binary cell operations: " + rlen1 + "x" + clen1 + " vs " + rlen2 + "x" + clen2);
        }
    }

    public static void isValidDimensionsBinaryExtended(MatrixBlock m1, MatrixBlock m2) {
        boolean isValid;
        int rlen1 = m1.rlen;
        int clen1 = m1.clen;
        int rlen2 = m2.rlen;
        int clen2 = m2.clen;
        boolean bl = isValid = rlen1 == rlen2 && clen1 == clen2 || rlen1 == rlen2 && clen1 > 1 && clen2 == 1 || rlen1 == rlen2 && clen1 == 1 && clen2 > 1 || clen1 == clen2 && rlen1 > 1 && rlen2 == 1 || clen1 == clen2 && rlen1 == 1 && rlen2 > 1 || clen1 == 1 && rlen2 == 1;
        if (!isValid) {
            throw new RuntimeException("Block sizes are not matched for binary cell operations: " + rlen1 + "x" + clen1 + " vs " + rlen2 + "x" + clen2);
        }
    }

    public static boolean isSparseSafeDivide(BinaryOperator op, MatrixBlock rhs) {
        return op.fn instanceof Divide && rhs.getNonZeros() == (long)rhs.getNumRows() * (long)rhs.getNumColumns();
    }

    public static boolean isAllDense(MatrixBlock ... mb) {
        return Arrays.stream(mb).allMatch(m -> !m.sparse);
    }

    private static void denseUnaryOperations(MatrixBlock m1, MatrixBlock ret, UnaryOperator op) {
        double val0 = op.fn.execute(0.0);
        int m = m1.rlen;
        int n = m1.clen;
        if (m1.isEmptyBlock(false)) {
            if (val0 != 0.0) {
                ret.reset(m, n, val0);
            }
            return;
        }
        if (m1.sparse && val0 != 0.0) {
            ret.reset(m, n, val0);
            ret.nonZeros = (long)m * (long)n;
        }
        LibMatrixBincell.sparseUnaryOperations(m1, ret, op);
    }

    private static void sparseUnaryOperations(MatrixBlock m1, MatrixBlock ret, UnaryOperator op) {
        if (m1.isEmptyBlock(false)) {
            return;
        }
        int m = m1.rlen;
        int n = m1.clen;
        if (m1.sparse && ret.sparse) {
            ret.allocateSparseRowsBlock();
            SparseBlock a = m1.sparseBlock;
            SparseBlock c = ret.sparseBlock;
            long nnz = 0L;
            for (int i = 0; i < m; ++i) {
                if (a.isEmpty(i)) continue;
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                c.allocate(i, alen);
                for (int j = apos; j < apos + alen; ++j) {
                    double val = op.fn.execute(avals[j]);
                    c.append(i, aix[j], val);
                    nnz += val != 0.0 ? 1L : 0L;
                }
            }
            ret.nonZeros = nnz;
        } else if (m1.sparse) {
            ret.allocateDenseBlock(false);
            SparseBlock a = m1.sparseBlock;
            DenseBlock c = ret.denseBlock;
            long nnz = ret.nonZeros > 0L ? (long)m * (long)n - a.size() : 0L;
            for (int i = 0; i < m; ++i) {
                if (a.isEmpty(i)) continue;
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                double[] cvals = c.values(i);
                int cix = c.pos(i);
                for (int j = apos; j < apos + alen; ++j) {
                    double val;
                    cvals[cix + aix[j]] = val = op.fn.execute(avals[j]);
                    nnz += val != 0.0 ? 1L : 0L;
                }
            }
            ret.nonZeros = nnz;
        } else {
            if (m1 != ret) {
                ret.allocateDenseBlock(false);
            }
            DenseBlock da = m1.getDenseBlock();
            DenseBlock dc = ret.getDenseBlock();
            long nnz = 0L;
            for (int bi = 0; bi < da.numBlocks(); ++bi) {
                double[] a = da.valuesAt(bi);
                double[] c = dc.valuesAt(bi);
                int len = da.size(bi);
                for (int i = 0; i < len; ++i) {
                    c[i] = op.fn.execute(a[i]);
                    nnz += c[i] != 0.0 ? 1L : 0L;
                }
            }
            ret.nonZeros = nnz;
        }
    }

    private static long safeBinary(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, BinaryAccessType atype, int rl, int ru) {
        boolean skipEmpty = op.fn instanceof Multiply || LibMatrixBincell.isSparseSafeDivide(op, m2);
        boolean copyLeftRightEmpty = op.fn instanceof Plus || op.fn instanceof Minus || op.fn instanceof PlusMultiply || op.fn instanceof MinusMultiply;
        boolean copyRightLeftEmpty = op.fn instanceof Plus;
        if (m1.isEmptyBlock(false) && m2.isEmptyBlock(false) || skipEmpty && (m1.isEmptyBlock(false) || m2.isEmptyBlock(false))) {
            return 0L;
        }
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR || atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
            if (!(m1.sparse || m2.sparse || ret.sparse)) {
                return LibMatrixBincell.safeBinaryMVDense(m1, m2, ret, op, rl, ru);
            }
            if (m1.sparse && !m2.sparse && !ret.sparse && atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
                LibMatrixBincell.safeBinaryMVSparseDenseRow(m1, m2, ret, op);
            } else if (m1.sparse) {
                LibMatrixBincell.safeBinaryMVSparse(m1, m2, ret, op);
            } else if (!m1.sparse && !m2.sparse && ret.sparse && op.fn instanceof Multiply && atype == BinaryAccessType.MATRIX_COL_VECTOR && (long)m1.rlen * (long)m2.clen < Integer.MAX_VALUE) {
                LibMatrixBincell.safeBinaryMVDenseSparseMult(m1, m2, ret, op);
            } else {
                LibMatrixBincell.safeBinaryMVGeneric(m1, m2, ret, op);
            }
        } else if (atype == BinaryAccessType.OUTER_VECTOR_VECTOR) {
            LibMatrixBincell.safeBinaryVVGeneric(m1, m2, ret, op);
        } else if (copyLeftRightEmpty && m2.isEmpty()) {
            ret.copyShallow(m1);
        } else if (copyRightLeftEmpty && m1.isEmpty()) {
            ret.copyShallow(m2);
        } else {
            if (m1.sparse && m2.sparse) {
                return LibMatrixBincell.safeBinaryMMSparseSparse(m1, m2, ret, op, rl, ru);
            }
            if (!ret.sparse && (m1.sparse || m2.sparse) && (op.fn instanceof Plus || op.fn instanceof Minus || op.fn instanceof PlusMultiply || op.fn instanceof MinusMultiply || op.fn instanceof Multiply && !m2.sparse)) {
                return LibMatrixBincell.safeBinaryMMSparseDenseDense(m1, m2, ret, op, rl, ru);
            }
            if (!(ret.sparse || m1.sparse || m2.sparse || m1.denseBlock == null || m2.denseBlock == null)) {
                return LibMatrixBincell.safeBinaryMMDenseDenseDense(m1, m2, ret, op, rl, ru);
            }
            if (skipEmpty && (m1.sparse || m2.sparse)) {
                return LibMatrixBincell.safeBinaryMMSparseDenseSkip(m1, m2, ret, op, rl, ru);
            }
            return LibMatrixBincell.safeBinaryMMGeneric(m1, m2, ret, op, rl, ru);
        }
        return ret.getNonZeros();
    }

    private static long safeBinaryMVDense(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        BinaryAccessType atype;
        boolean isMultiply = op.fn instanceof Multiply;
        boolean skipEmpty = isMultiply;
        if (skipEmpty && (m1.isEmptyBlock(false) || m2.isEmptyBlock(false))) {
            return 0L;
        }
        if (!ret.isAllocated()) {
            ret.allocateDenseBlock();
        }
        if ((atype = LibMatrixBincell.getBinaryAccessType(m1, m2)) == BinaryAccessType.MATRIX_COL_VECTOR) {
            return LibMatrixBincell.safeBinaryMVDenseColVector(m1, m2, ret, op, rl, ru);
        }
        return LibMatrixBincell.safeBinaryMVDenseRowVector(m1, m2, ret, op, rl, ru);
    }

    private static long safeBinaryMVDenseColVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        boolean multiply = op.fn instanceof Multiply;
        int clen = m1.clen;
        DenseBlock da = m1.getDenseBlock();
        if (da.values(0) == null) {
            throw new RuntimeException("Invalid input with empty input");
        }
        DenseBlock dc = ret.getDenseBlock();
        long nnz = 0L;
        double[] b = m2.getDenseBlockValues();
        if (b == null) {
            if (multiply) {
                return 0L;
            }
            for (int i = rl; i < ru; ++i) {
                double[] a = da.values(i);
                double[] c = dc.values(i);
                int ix = da.pos(i);
                for (int j = 0; j < clen; ++j) {
                    double val;
                    double d = val = op.fn.execute(a[ix + j], 0.0);
                    c[ix + j] = d;
                    nnz += d != 0.0 ? 1L : 0L;
                }
            }
        } else if (multiply) {
            for (int i = rl; i < ru; ++i) {
                double[] a = da.values(i);
                double[] c = dc.values(i);
                int ix = da.pos(i);
                double v2 = b[i];
                if (v2 == 0.0) continue;
                if (v2 == 1.0) {
                    System.arraycopy(a, ix, c, ix, clen);
                    nnz += m1.recomputeNonZeros(i, i, 0, clen - 1);
                    continue;
                }
                for (int j = 0; j < clen; ++j) {
                    double val;
                    double d = val = op.fn.execute(a[ix + j], v2);
                    c[ix + j] = d;
                    nnz += d != 0.0 ? 1L : 0L;
                }
            }
        } else {
            for (int i = rl; i < ru; ++i) {
                double[] a = da.values(i);
                double[] c = dc.values(i);
                int ix = da.pos(i);
                double v2 = b[i];
                for (int j = 0; j < clen; ++j) {
                    double val;
                    double d = val = op.fn.execute(a[ix + j], v2);
                    c[ix + j] = d;
                    nnz += d != 0.0 ? 1L : 0L;
                }
            }
        }
        return nnz;
    }

    private static long safeBinaryMVDenseRowVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int clen = m1.clen;
        DenseBlock da = m1.getDenseBlock();
        DenseBlock dc = ret.getDenseBlock();
        long nnz = 0L;
        double[] b = m2.getDenseBlockValues();
        if (da == null && b == null) {
            double val = op.fn.execute(0L, 0L);
            dc.set(rl, ru, 0, clen, val);
            nnz += val != 0.0 ? (long)(ru - rl) * (long)clen : 0L;
        } else if (da == null) {
            double[] c = dc.values(rl);
            for (int j = 0; j < clen; ++j) {
                double val = op.fn.execute(0.0, b[j]);
                c[j] = val;
                nnz += c[j] != 0.0 ? (long)(ru - rl) : 0L;
            }
            for (int i = rl + 1; i < ru; ++i) {
                dc.set(i, c);
            }
        } else {
            for (int i = rl; i < ru; ++i) {
                double[] a = da.values(i);
                double[] c = dc.values(i);
                int ix = da.pos(i);
                for (int j = 0; j < clen; ++j) {
                    double val;
                    double d = val = op.fn.execute(a[ix + j], b != null ? b[j] : 0.0);
                    c[ix + j] = d;
                    nnz += d != 0.0 ? 1L : 0L;
                }
            }
        }
        return nnz;
    }

    private static void safeBinaryMVSparseDenseRow(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        boolean isMultiply;
        boolean skipEmpty = isMultiply = op.fn instanceof Multiply;
        int rlen = m1.rlen;
        int clen = m1.clen;
        SparseBlock a = m1.sparseBlock;
        double[] b = m2.getDenseBlockValues();
        DenseBlock c = ret.allocateDenseBlock().getDenseBlock();
        if (skipEmpty && (m1.isEmptyBlock(false) || m2.isEmptyBlock(false))) {
            return;
        }
        double[] tmp = new double[clen];
        if (!skipEmpty) {
            for (int i = 0; i < clen; ++i) {
                tmp[i] = op.fn.execute(0.0, b[i]);
            }
        }
        long nnz = 0L;
        for (int i = 0; i < rlen; ++i) {
            if (skipEmpty && (a == null || a.isEmpty(i))) continue;
            double[] cvals = c.values(i);
            int cpos = c.pos(i);
            System.arraycopy(tmp, 0, cvals, cpos, clen);
            if (a != null && !a.isEmpty(i)) {
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                for (int j = apos; j < apos + alen; ++j) {
                    cvals[cpos + aix[j]] = op.fn.execute(avals[j], b[aix[j]]);
                }
            }
            nnz += (long)UtilFunctions.computeNnz(cvals, cpos, clen);
        }
        ret.nonZeros = nnz;
    }

    private static void safeBinaryMVSparse(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        boolean isMultiply = op.fn instanceof Multiply;
        boolean skipEmpty = isMultiply || LibMatrixBincell.isSparseSafeDivide(op, m2);
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        if (skipEmpty && (m1.isEmptyBlock(false) || m2.isEmptyBlock(false))) {
            return;
        }
        if (ret.sparse) {
            ret.allocateSparseRowsBlock();
        }
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
            LibMatrixBincell.safeBinaryMVSparseColVector(m1, m2, ret, op);
        } else if (atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
            LibMatrixBincell.safeBinaryMVSparseRowVector(m1, m2, ret, op);
        }
    }

    private static void safeBinaryMVSparseColVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        boolean isMultiply = op.fn instanceof Multiply;
        boolean skipEmpty = isMultiply || LibMatrixBincell.isSparseSafeDivide(op, m2);
        int rlen = m1.rlen;
        int clen = m1.clen;
        SparseBlock a = m1.sparseBlock;
        for (int i = 0; i < rlen; ++i) {
            double v2 = m2.quickGetValue(i, 0);
            if (skipEmpty && (a == null || a.isEmpty(i) || v2 == 0.0) || (a == null || a.isEmpty(i)) && v2 == 0.0) continue;
            if (isMultiply && v2 == 1.0) {
                if (a == null || a.isEmpty(i)) continue;
                ret.appendRow(i, a.get(i));
                continue;
            }
            int lastIx = -1;
            if (a != null && !a.isEmpty(i)) {
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                for (int j = apos; j < apos + alen; ++j) {
                    LibMatrixBincell.fillZeroValues(op, v2, ret, skipEmpty, i, lastIx + 1, aix[j]);
                    double v = op.fn.execute(avals[j], v2);
                    ret.appendValue(i, aix[j], v);
                    lastIx = aix[j];
                }
            }
            LibMatrixBincell.fillZeroValues(op, v2, ret, skipEmpty, i, lastIx + 1, clen);
        }
    }

    private static void safeBinaryMVSparseRowVector(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        boolean isMultiply = op.fn instanceof Multiply;
        boolean skipEmpty = isMultiply || LibMatrixBincell.isSparseSafeDivide(op, m2);
        int rlen = m1.rlen;
        int clen = m1.clen;
        SparseBlock a = m1.sparseBlock;
        for (int i = 0; i < rlen; ++i) {
            if (skipEmpty && (a == null || a.isEmpty(i))) continue;
            if (skipEmpty && ret.sparse) {
                ret.sparseBlock.allocate(i, a.size(i));
            }
            int lastIx = -1;
            if (a != null && !a.isEmpty(i)) {
                int apos = a.pos(i);
                int alen = a.size(i);
                int[] aix = a.indexes(i);
                double[] avals = a.values(i);
                for (int j = apos; j < apos + alen; ++j) {
                    LibMatrixBincell.fillZeroValues(op, m2, ret, skipEmpty, i, lastIx + 1, aix[j]);
                    double v2 = m2.quickGetValue(0, aix[j]);
                    double v = op.fn.execute(avals[j], v2);
                    ret.appendValue(i, aix[j], v);
                    lastIx = aix[j];
                }
            }
            LibMatrixBincell.fillZeroValues(op, m2, ret, skipEmpty, i, lastIx + 1, clen);
        }
    }

    private static final void fillZeroValues(BinaryOperator op, double v2, MatrixBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        if (skipEmpty) {
            return;
        }
        double v = op.fn.execute(0.0, v2);
        if (v != 0.0) {
            for (int k = cpos; k < len; ++k) {
                ret.appendValue(rpos, k, v);
            }
        }
    }

    private static void fillZeroValues(BinaryOperator op, MatrixBlock m2, MatrixBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        if (skipEmpty) {
            return;
        }
        if (m2.isEmpty()) {
            LibMatrixBincell.fillZeroValuesEmpty(op, m2, ret, skipEmpty, rpos, cpos, len);
        } else if (m2.isInSparseFormat()) {
            LibMatrixBincell.fillZeroValuesSparse(op, m2, ret, skipEmpty, rpos, cpos, len);
        } else {
            LibMatrixBincell.fillZeroValuesDense(op, m2, ret, skipEmpty, rpos, cpos, len);
        }
    }

    private static void fillZeroValuesEmpty(BinaryOperator op, MatrixBlock m2, MatrixBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        boolean zeroIsZero;
        double zero = op.fn.execute(0.0, 0.0);
        boolean bl = zeroIsZero = zero == 0.0;
        if (!zeroIsZero) {
            while (cpos < len) {
                ret.appendValue(rpos, cpos++, zero);
            }
        }
    }

    private static void fillZeroValuesDense(BinaryOperator op, MatrixBlock m2, MatrixBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        DenseBlock db = m2.getDenseBlock();
        double[] vals = db.values(0);
        SparseBlock r = ret.getSparseBlock();
        if (ret.isInSparseFormat() && r instanceof SparseBlockMCSR) {
            SparseBlockMCSR mCSR = (SparseBlockMCSR)r;
            mCSR.allocate(rpos, cpos, len);
            SparseRow sr = mCSR.get(rpos);
            for (int k = cpos; k < len; ++k) {
                sr.append(k, op.fn.execute(0.0, vals[k]));
            }
        } else {
            for (int k = cpos; k < len; ++k) {
                ret.appendValue(rpos, k, op.fn.execute(0.0, vals[k]));
            }
        }
    }

    private static void fillZeroValuesSparse(BinaryOperator op, MatrixBlock m2, MatrixBlock ret, boolean skipEmpty, int rpos, int cpos, int len) {
        block8: {
            int apos;
            SparseBlock sb;
            boolean zeroIsZero;
            double zero;
            block7: {
                zero = op.fn.execute(0.0, 0.0);
                zeroIsZero = zero == 0.0;
                sb = m2.getSparseBlock();
                if (!sb.isEmpty(0)) break block7;
                if (zeroIsZero) break block8;
                while (cpos < len) {
                    ret.appendValue(rpos, cpos++, zero);
                }
                break block8;
            }
            int alen = sb.size(0) + apos;
            int[] aix = sb.indexes(0);
            double[] vals = sb.values(0);
            for (apos = sb.pos(0); apos < alen && aix[apos] < len && cpos > aix[apos]; ++apos) {
            }
            while (apos < alen && aix[apos] < len) {
                if (!zeroIsZero) {
                    while (cpos < len && cpos < aix[apos]) {
                        ret.appendValue(rpos, cpos++, zero);
                    }
                }
                cpos = aix[apos];
                double v = op.fn.execute(0.0, vals[apos]);
                ret.appendValue(rpos, aix[apos], v);
                ++apos;
            }
            if (!zeroIsZero) {
                while (cpos < len) {
                    ret.appendValue(rpos, cpos++, zero);
                }
            }
        }
    }

    private static void safeBinaryMVDenseSparseMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        if (m1.isEmptyBlock(false) || m2.isEmptyBlock(false)) {
            return;
        }
        int rlen = m1.rlen;
        int clen = m1.clen;
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        double[] a = m1.getDenseBlockValues();
        double[] b = m2.getDenseBlockValues();
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
            int nnz = 0;
            int i = 0;
            int aix = 0;
            while (i < rlen) {
                nnz += b[i] != 0.0 ? UtilFunctions.countNonZeros(a, aix, clen) : 0;
                ++i;
                aix += clen;
            }
            int[] rptr = new int[rlen + 1];
            int[] indexes = new int[nnz];
            double[] vals = new double[nnz];
            rptr[0] = 0;
            int i2 = 0;
            int aix2 = 0;
            int pos = 0;
            while (i2 < rlen) {
                double bval = b[i2];
                if (bval != 0.0) {
                    for (int j = 0; j < clen; ++j) {
                        double aval = a[aix2 + j];
                        if (aval == 0.0) continue;
                        indexes[pos] = j;
                        vals[pos] = aval * bval;
                        ++pos;
                    }
                }
                rptr[i2 + 1] = pos;
                ++i2;
                aix2 += clen;
            }
            ret.sparseBlock = new SparseBlockCSR(rptr, indexes, vals, nnz);
            ret.setNonZeros(nnz);
        }
    }

    private static void safeBinaryMVGeneric(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        block14: {
            BinaryAccessType atype;
            int clen;
            int rlen;
            boolean isMultiply;
            block13: {
                boolean skipEmpty = isMultiply = op.fn instanceof Multiply;
                rlen = m1.rlen;
                clen = m1.clen;
                atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
                if (skipEmpty && (m1.isEmptyBlock(false) || m2.isEmptyBlock(false))) {
                    return;
                }
                if (ret.sparse) {
                    ret.allocateSparseRowsBlock();
                }
                if (atype != BinaryAccessType.MATRIX_COL_VECTOR) break block13;
                for (int i = 0; i < rlen; ++i) {
                    double v1;
                    int j;
                    double v2 = m2.quickGetValue(i, 0);
                    if (skipEmpty && v2 == 0.0) continue;
                    if (isMultiply && v2 == 1.0) {
                        for (j = 0; j < clen; ++j) {
                            v1 = m1.quickGetValue(i, j);
                            ret.appendValue(i, j, v1);
                        }
                        continue;
                    }
                    for (j = 0; j < clen; ++j) {
                        v1 = m1.quickGetValue(i, j);
                        double v = op.fn.execute(v1, v2);
                        ret.appendValue(i, j, v);
                    }
                }
                break block14;
            }
            if (atype != BinaryAccessType.MATRIX_ROW_VECTOR) break block14;
            if (m2.sparse && ret.sparse && isMultiply) {
                SparseBlock b = m2.sparseBlock;
                SparseBlock c = ret.sparseBlock;
                if (b.isEmpty(0)) {
                    return;
                }
                int blen = b.size(0);
                int[] bix = b.indexes(0);
                double[] bvals = b.values(0);
                for (int i = 0; i < rlen; ++i) {
                    c.allocate(i, blen);
                    for (int j = 0; j < blen; ++j) {
                        c.append(i, bix[j], m1.quickGetValue(i, bix[j]) * bvals[j]);
                    }
                }
                ret.setNonZeros(c.size());
            } else {
                for (int i = 0; i < rlen; ++i) {
                    for (int j = 0; j < clen; ++j) {
                        double v1 = m1.quickGetValue(i, j);
                        double v2 = m2.quickGetValue(0, j);
                        double v = op.fn.execute(v1, v2);
                        ret.appendValue(i, j, v);
                    }
                }
            }
        }
    }

    private static void safeBinaryVVGeneric(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) {
        int rlen = m1.rlen;
        int clen = m2.clen;
        if (ret.sparse) {
            ret.allocateSparseRowsBlock();
        }
        if (LibMatrixOuterAgg.isCompareOperator(op) && m2.getNumColumns() > 16 && SortUtils.isSorted(m2)) {
            LibMatrixBincell.performBinOuterOperation(m1, m2, ret, op);
        } else {
            for (int r = 0; r < rlen; ++r) {
                double v1 = m1.quickGetValue(r, 0);
                for (int c = 0; c < clen; ++c) {
                    double v2 = m2.quickGetValue(0, c);
                    double v = op.fn.execute(v1, v2);
                    ret.appendValue(r, c, v);
                }
            }
        }
    }

    private static long safeBinaryMMSparseSparse(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        if (ret.sparse && !ret.isAllocated()) {
            ret.allocateSparseRowsBlock();
        }
        long lnnz = 0L;
        if (m1.sparseBlock != null && m2.sparseBlock != null) {
            SparseBlock lsblock = m1.sparseBlock;
            SparseBlock rsblock = m2.sparseBlock;
            if (ret.sparse && lsblock.isAligned(rsblock)) {
                SparseBlock c = ret.sparseBlock;
                for (int r = rl; r < ru; ++r) {
                    if (lsblock.isEmpty(r)) continue;
                    int alen = lsblock.size(r);
                    int apos = lsblock.pos(r);
                    int[] aix = lsblock.indexes(r);
                    double[] avals = lsblock.values(r);
                    double[] bvals = rsblock.values(r);
                    c.allocate(r, alen);
                    for (int j = apos; j < apos + alen; ++j) {
                        double tmp = op.fn.execute(avals[j], bvals[j]);
                        c.append(r, aix[j], tmp);
                    }
                    lnnz += (long)c.size(r);
                }
            } else {
                for (int r = rl; r < ru; ++r) {
                    if (!lsblock.isEmpty(r) && !rsblock.isEmpty(r)) {
                        LibMatrixBincell.mergeForSparseBinary(op, lsblock.values(r), lsblock.indexes(r), lsblock.pos(r), lsblock.size(r), rsblock.values(r), rsblock.indexes(r), rsblock.pos(r), rsblock.size(r), r, ret);
                    } else if (!rsblock.isEmpty(r)) {
                        LibMatrixBincell.appendRightForSparseBinary(op, rsblock.values(r), rsblock.indexes(r), rsblock.pos(r), rsblock.size(r), 0, r, ret);
                    } else if (!lsblock.isEmpty(r)) {
                        LibMatrixBincell.appendLeftForSparseBinary(op, lsblock.values(r), lsblock.indexes(r), lsblock.pos(r), lsblock.size(r), 0, r, ret);
                    }
                    lnnz += ret.recomputeNonZeros(r, r);
                }
            }
        } else if (m2.sparseBlock != null) {
            SparseBlock rsblock = m2.sparseBlock;
            for (int r = rl; r < Math.min(ru, rsblock.numRows()); ++r) {
                if (rsblock.isEmpty(r)) continue;
                LibMatrixBincell.appendRightForSparseBinary(op, rsblock.values(r), rsblock.indexes(r), rsblock.pos(r), rsblock.size(r), 0, r, ret);
                lnnz += ret.recomputeNonZeros(r, r);
            }
        } else {
            SparseBlock lsblock = m1.sparseBlock;
            for (int r = rl; r < ru; ++r) {
                if (lsblock.isEmpty(r)) continue;
                LibMatrixBincell.appendLeftForSparseBinary(op, lsblock.values(r), lsblock.indexes(r), lsblock.pos(r), lsblock.size(r), 0, r, ret);
                lnnz += ret.recomputeNonZeros(r, r);
            }
        }
        return lnnz;
    }

    private static long safeBinaryMMSparseDenseDense(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int i;
        DenseBlock da;
        if (!ret.isAllocated()) {
            ret.allocateDenseBlock();
        }
        int n = ret.clen;
        DenseBlock dc = ret.getDenseBlock();
        if (m1.sparse && m1.sparseBlock != null) {
            SparseBlock a = m1.sparseBlock;
            for (int i2 = rl; i2 < ru; ++i2) {
                double[] c = dc.values(i2);
                int cpos = dc.pos(i2);
                if (a.isEmpty(i2)) continue;
                int apos = a.pos(i2);
                int alen = a.size(i2);
                int[] aix = a.indexes(i2);
                double[] avals = a.values(i2);
                for (int k = apos; k < apos + alen; ++k) {
                    c[cpos + aix[k]] = avals[k];
                }
            }
        } else if (!m1.sparse) {
            if (!m1.isEmptyBlock(false)) {
                int rlbix = dc.index(rl);
                int rubix = dc.index(ru - 1);
                da = m1.getDenseBlock();
                if (rlbix == rubix) {
                    System.arraycopy(da.valuesAt(rlbix), da.pos(rl), dc.valuesAt(rlbix), dc.pos(rl), (ru - rl) * n);
                } else {
                    for (i = rl; i < ru; ++i) {
                        System.arraycopy(da.values(i), da.pos(i), dc.values(i), dc.pos(i), n);
                    }
                }
            } else {
                dc.set(0.0);
            }
        }
        long lnnz = 0L;
        if (m2.sparse && m2.sparseBlock != null) {
            SparseBlock a = m2.sparseBlock;
            for (i = rl; i < ru; ++i) {
                double[] c = dc.values(i);
                int cpos = dc.pos(i);
                if (!a.isEmpty(i)) {
                    int apos = a.pos(i);
                    int alen = a.size(i);
                    int[] aix = a.indexes(i);
                    double[] avals = a.values(i);
                    for (int k = apos; k < apos + alen; ++k) {
                        c[cpos + aix[k]] = op.fn.execute(c[cpos + aix[k]], avals[k]);
                    }
                }
                lnnz += ret.recomputeNonZeros(i, i);
            }
        } else if (!m2.sparse) {
            if (!m2.isEmptyBlock(false)) {
                da = m2.getDenseBlock();
                for (i = rl; i < ru; ++i) {
                    int apos;
                    double[] a = da.values(i);
                    double[] c = dc.values(i);
                    for (int j = apos = da.pos(i); j < apos + n; ++j) {
                        c[j] = op.fn.execute(c[j], a[j]);
                        lnnz += c[j] != 0.0 ? 1L : 0L;
                    }
                }
            } else if (op.fn instanceof Multiply) {
                ret.denseBlock.set(0.0);
            } else {
                lnnz = m1.nonZeros;
            }
        }
        return lnnz;
    }

    private static long safeBinaryMMDenseDenseDense(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        boolean isPM;
        int clen = m1.clen;
        boolean bl = isPM = op.fn instanceof PlusMultiply || op.fn instanceof MinusMultiply;
        if (!ret.isAllocated()) {
            ret.allocateDenseBlock();
        }
        DenseBlock da = m1.getDenseBlock();
        DenseBlock db = m2.getDenseBlock();
        DenseBlock dc = ret.getDenseBlock();
        if (isPM && clen >= 64) {
            return LibMatrixBincell.safeBinaryMMDenseDenseDensePM_Vec(da, db, dc, op, rl, ru, clen);
        }
        if (da.isContiguous() && db.isContiguous() && dc.isContiguous()) {
            if (op.fn instanceof PlusMultiply) {
                return LibMatrixBincell.safeBinaryMMDenseDenseDensePM(da, db, dc, op, rl, ru, clen);
            }
            return LibMatrixBincell.safeBinaryMMDenseDenseDenseContiguous(da, db, dc, op, rl, ru, clen);
        }
        return LibMatrixBincell.safeBinaryMMDenseDenseDenseGeneric(da, db, dc, op, rl, ru, clen);
    }

    private static final long safeBinaryMMDenseDenseDensePM_Vec(DenseBlock da, DenseBlock db, DenseBlock dc, BinaryOperator op, int rl, int ru, int clen) {
        double cntPM = op.fn instanceof PlusMultiply ? ((PlusMultiply)op.fn).getConstant() : -1.0 * ((MinusMultiply)op.fn).getConstant();
        long lnnz = 0L;
        for (int i = rl; i < ru; ++i) {
            double[] a = da.values(i);
            double[] b = db.values(i);
            double[] c = dc.values(i);
            int pos = da.pos(i);
            System.arraycopy(a, pos, c, pos, clen);
            LibMatrixMult.vectMultiplyAdd(cntPM, b, c, pos, pos, clen);
            lnnz += (long)UtilFunctions.computeNnz(c, pos, clen);
        }
        return lnnz;
    }

    private static final long safeBinaryMMDenseDenseDensePM(DenseBlock da, DenseBlock db, DenseBlock dc, BinaryOperator op, int rl, int ru, int clen) {
        long lnnz = 0L;
        double[] a = da.values(0);
        double[] b = db.values(0);
        double[] c = dc.values(0);
        double d = ((PlusMultiply)op.fn).getConstant();
        for (int i = da.pos(rl); i < da.pos(ru); ++i) {
            c[i] = a[i] + d * b[i];
            lnnz += c[i] != 0.0 ? 1L : 0L;
        }
        return lnnz;
    }

    private static final long safeBinaryMMDenseDenseDenseContiguous(DenseBlock da, DenseBlock db, DenseBlock dc, BinaryOperator op, int rl, int ru, int clen) {
        long lnnz = 0L;
        double[] a = da.values(0);
        double[] b = db.values(0);
        double[] c = dc.values(0);
        for (int i = da.pos(rl); i < da.pos(ru); ++i) {
            int n = i;
            c[n] = c[n] + op.fn.execute(a[i], b[i]);
            lnnz += c[i] != 0.0 ? 1L : 0L;
        }
        return lnnz;
    }

    private static final long safeBinaryMMDenseDenseDenseGeneric(DenseBlock da, DenseBlock db, DenseBlock dc, BinaryOperator op, int rl, int ru, int clen) {
        ValueFunction fn = op.fn;
        long lnnz = 0L;
        for (int i = rl; i < ru; ++i) {
            int pos;
            double[] a = da.values(i);
            double[] b = db.values(i);
            double[] c = dc.values(i);
            for (int j = pos = da.pos(i); j < pos + clen; ++j) {
                c[j] = fn.execute(a[j], b[j]);
                lnnz += c[j] != 0.0 ? 1L : 0L;
            }
        }
        return lnnz;
    }

    private static long safeBinaryMMSparseDenseSkip(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        MatrixBlock b;
        SparseBlock a;
        SparseBlock sparseBlock = a = m1.sparse ? m1.sparseBlock : m2.sparseBlock;
        if (a == null) {
            return 0L;
        }
        MatrixBlock matrixBlock = b = m1.sparse ? m2 : m1;
        if (!ret.isAllocated()) {
            ret.allocateBlock();
        }
        long lnnz = 0L;
        for (int i = rl; i < Math.min(ru, a.numRows()); ++i) {
            if (a.isEmpty(i)) continue;
            int apos = a.pos(i);
            int alen = a.size(i);
            int[] aix = a.indexes(i);
            double[] avals = a.values(i);
            if (ret.sparse && !b.sparse) {
                ret.sparseBlock.allocate(i, alen);
            }
            for (int k = apos; k < apos + alen; ++k) {
                double in2 = b.quickGetValue(i, aix[k]);
                if (in2 == 0.0) continue;
                double val = op.fn.execute(avals[k], in2);
                lnnz += val != 0.0 ? 1L : 0L;
                ret.appendValuePlain(i, aix[k], val);
            }
        }
        return lnnz;
    }

    private static long safeBinaryMMGeneric(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int clen = m2.clen;
        long lnnz = 0L;
        for (int r = rl; r < ru; ++r) {
            for (int c = 0; c < clen; ++c) {
                double in1 = m1.quickGetValue(r, c);
                double in2 = m2.quickGetValue(r, c);
                if (in1 == 0.0 && in2 == 0.0) continue;
                double val = op.fn.execute(in1, in2);
                lnnz += val != 0.0 ? 1L : 0L;
                ret.appendValuePlain(r, c, val);
            }
        }
        return lnnz;
    }

    private static long performBinOuterOperation(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator bOp) {
        int rlen = m1.rlen;
        int clen = ret.clen;
        double[] b = DataConverter.convertToDoubleVector(m2);
        if (!ret.isAllocated()) {
            ret.allocateDenseBlock();
        }
        DenseBlock dc = ret.getDenseBlock();
        boolean scanType1 = bOp.fn instanceof LessThan || bOp.fn instanceof Equals || bOp.fn instanceof NotEquals || bOp.fn instanceof GreaterThanEquals;
        boolean scanType2 = bOp.fn instanceof LessThanEquals || bOp.fn instanceof Equals || bOp.fn instanceof NotEquals || bOp.fn instanceof GreaterThan;
        boolean lt = bOp.fn instanceof LessThan;
        boolean lte = bOp.fn instanceof LessThanEquals;
        boolean gt = bOp.fn instanceof GreaterThan;
        boolean gte = bOp.fn instanceof GreaterThanEquals;
        boolean eqNeq = bOp.fn instanceof Equals || bOp.fn instanceof NotEquals;
        long lnnz = 0L;
        for (int bi = 0; bi < dc.numBlocks(); ++bi) {
            double[] c = dc.valuesAt(bi);
            int r = bi * dc.blockSize();
            int off = 0;
            while (r < rlen) {
                int end;
                int start;
                int ixPos1;
                double value = m1.quickGetValue(r, 0);
                int ixPos2 = ixPos1 = Arrays.binarySearch(b, value);
                if (ixPos1 >= 0) {
                    if (scanType1) {
                        while (ixPos1 < b.length && value == b[ixPos1]) {
                            ++ixPos1;
                        }
                    }
                    if (scanType2) {
                        while (ixPos2 > 0 && value == b[ixPos2 - 1]) {
                            --ixPos2;
                        }
                    }
                } else {
                    ixPos2 = ixPos1 = Math.abs(ixPos1) - 1;
                }
                int n = lt ? ixPos1 : (start = lte || eqNeq ? ixPos2 : 0);
                int n2 = gt ? ixPos2 : (end = gte || eqNeq ? ixPos1 : clen);
                if (bOp.fn instanceof NotEquals) {
                    Arrays.fill(c, off, off + start, 1.0);
                    Arrays.fill(c, off + end, off + clen, 1.0);
                    lnnz += (long)(start + (clen - end));
                } else if (start < end) {
                    Arrays.fill(c, off + start, off + end, 1.0);
                    lnnz += (long)(end - start);
                }
                ++r;
                off += clen;
            }
        }
        ret.setNonZeros(lnnz);
        ret.examSparsity();
        return lnnz;
    }

    private static long unsafeBinary(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op, int rl, int ru) {
        int clen = m1.clen;
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
        long lnnz = 0L;
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
            for (int r = rl; r < ru; ++r) {
                double v2 = m2.quickGetValue(r, 0);
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1.quickGetValue(r, c);
                    double v = op.fn.execute(v1, v2);
                    ret.appendValuePlain(r, c, v);
                    lnnz += v != 0.0 ? 1L : 0L;
                }
            }
        } else if (atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
            for (int r = rl; r < ru; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1.quickGetValue(r, c);
                    double v2 = m2.quickGetValue(0, c);
                    double v = op.fn.execute(v1, v2);
                    ret.appendValuePlain(r, c, v);
                    lnnz += v != 0.0 ? 1L : 0L;
                }
            }
        } else if (atype == BinaryAccessType.OUTER_VECTOR_VECTOR) {
            int clen2 = m2.clen;
            if (LibMatrixOuterAgg.isCompareOperator(op) && m2.getNumColumns() > 16 && SortUtils.isSorted(m2)) {
                lnnz = LibMatrixBincell.performBinOuterOperation(m1, m2, ret, op);
            } else {
                for (int r = rl; r < ru; ++r) {
                    double v1 = m1.quickGetValue(r, 0);
                    for (int c = 0; c < clen2; ++c) {
                        double v2 = m2.quickGetValue(0, c);
                        double v = op.fn.execute(v1, v2);
                        lnnz += v != 0.0 ? 1L : 0L;
                        ret.appendValuePlain(r, c, v);
                    }
                }
            }
        } else if (!(m1.clen != 1 || m1.sparse || m1.isEmptyBlock(false) || m2.sparse || m2.isEmptyBlock(false))) {
            ret.allocateDenseBlock();
            double[] a = m1.getDenseBlockValues();
            double[] b = m2.getDenseBlockValues();
            double[] c = ret.getDenseBlockValues();
            for (int i = rl; i < ru; ++i) {
                c[i] = op.fn.execute(a[i], b[i]);
                lnnz += c[i] != 0.0 ? 1L : 0L;
            }
        } else {
            for (int r = rl; r < ru; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1.quickGetValue(r, c);
                    double v2 = m2.quickGetValue(r, c);
                    double v = op.fn.execute(v1, v2);
                    ret.appendValuePlain(r, c, v);
                    lnnz += v != 0.0 ? 1L : 0L;
                }
            }
        }
        return lnnz;
    }

    private static long safeBinaryScalar(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int rl, int ru) {
        if (m1.isEmptyBlock(false)) {
            return 0L;
        }
        if (m1.sparse != ret.sparse) {
            throw new DMLRuntimeException("Unsupported safe binary scalar operations over different input/output representation: " + m1.sparse + " " + ret.sparse);
        }
        boolean copyOnes = op.fn instanceof NotEquals && op.getConstant() == 0.0;
        boolean allocExact = op.fn instanceof Multiply || op.fn instanceof Multiply2 || op.fn instanceof Power2 || Builtin.isBuiltinCode(op.fn, Builtin.BuiltinCode.MAX) || Builtin.isBuiltinCode(op.fn, Builtin.BuiltinCode.MIN);
        long lnnz = 0L;
        if (m1.sparse) {
            ret.allocateSparseRowsBlock();
            SparseBlock a = m1.sparseBlock;
            SparseBlock c = ret.sparseBlock;
            long nnz = 0L;
            for (int r = rl; r < ru; ++r) {
                if (a.isEmpty(r)) continue;
                int apos = a.pos(r);
                int alen = a.size(r);
                int[] aix = a.indexes(r);
                double[] avals = a.values(r);
                if (copyOnes) {
                    SparseRowVector crow = new SparseRowVector(alen);
                    crow.setSize(alen);
                    System.arraycopy(aix, apos, crow.indexes(), 0, alen);
                    Arrays.fill(crow.values(), 0, alen, 1.0);
                    c.set(r, crow, false);
                    nnz += (long)alen;
                    continue;
                }
                if (allocExact) {
                    c.allocate(r, alen);
                }
                for (int j = apos; j < apos + alen; ++j) {
                    double val = op.executeScalar(avals[j]);
                    c.append(r, aix[j], val);
                    nnz += val != 0.0 ? 1L : 0L;
                }
            }
            lnnz = ret.nonZeros = nnz;
        } else {
            lnnz = LibMatrixBincell.denseBinaryScalar(m1, ret, op, rl, ru);
        }
        return lnnz;
    }

    private static long unsafeBinaryScalar(MatrixBlock m1, MatrixBlock ret, ScalarOperator op) {
        if (m1.isEmptyBlock(false)) {
            double val = op.executeScalar(0.0);
            if (val != 0.0) {
                ret.reset(ret.rlen, ret.clen, val);
            }
            return val != 0.0 ? ret.getLength() : 0L;
        }
        if (ret.sparse) {
            throw new DMLRuntimeException("Unsupported unsafe binary scalar operations over sparse output representation.");
        }
        int m = m1.rlen;
        int n = m1.clen;
        long lnnz = 0L;
        if (m1.sparse) {
            boolean lsparseSafe;
            ret.allocateDenseBlock();
            SparseBlock a = m1.sparseBlock;
            DenseBlock dc = ret.getDenseBlock();
            double val0 = op.executeScalar(0.0);
            boolean bl = lsparseSafe = val0 == 0.0;
            if (!lsparseSafe) {
                dc.set(val0);
            }
            long nnz = lsparseSafe ? 0L : (long)(m * n);
            for (int bi = 0; bi < dc.numBlocks(); ++bi) {
                int blen = dc.blockSize(bi);
                double[] c = dc.valuesAt(bi);
                int i = bi * dc.blockSize();
                int cix = i * n;
                while (i < blen && i < m) {
                    if (!a.isEmpty(i)) {
                        int apos = a.pos(i);
                        int alen = a.size(i);
                        int[] aix = a.indexes(i);
                        double[] avals = a.values(i);
                        for (int j = apos; j < apos + alen; ++j) {
                            double val;
                            c[cix + aix[j]] = val = op.executeScalar(avals[j]);
                            nnz += lsparseSafe ? (long)(val != 0.0 ? 1 : 0) : (long)(val == 0.0 ? -1 : 0);
                        }
                    }
                    ++i;
                    cix += n;
                }
            }
            lnnz = ret.nonZeros = nnz;
        } else {
            lnnz = LibMatrixBincell.denseBinaryScalar(m1, ret, op, 0, m);
        }
        return lnnz;
    }

    private static long denseBinaryScalar(MatrixBlock m1, MatrixBlock ret, ScalarOperator op, int rl, int ru) {
        ret.allocateDenseBlock(true);
        DenseBlock da = m1.getDenseBlock();
        DenseBlock dc = ret.getDenseBlock();
        int clen = m1.clen;
        long nnz = 0L;
        if (clen == 1) {
            double[] a = da.valuesAt(0);
            double[] c = dc.valuesAt(0);
            for (int i = rl; i < ru; ++i) {
                c[i] = op.executeScalar(a[i]);
                nnz += c[i] != 0.0 ? 1L : 0L;
            }
        } else {
            for (int i = rl; i < ru; ++i) {
                double[] a = da.values(i);
                double[] c = dc.values(i);
                int apos = da.pos(i);
                int cpos = dc.pos(i);
                for (int j = 0; j < clen; ++j) {
                    c[cpos + j] = op.executeScalar(a[apos + j]);
                    nnz += c[cpos + j] != 0.0 ? 1L : 0L;
                }
            }
        }
        ret.nonZeros = nnz;
        return ret.nonZeros;
    }

    private static void safeBinaryInPlace(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        boolean PoM;
        boolean bl = PoM = op.fn instanceof Plus || op.fn instanceof Minus;
        if (m1ret.isEmpty() && m2.isEmpty() || PoM && m2.isEmpty()) {
            boolean isEquals;
            boolean bl2 = isEquals = op.fn instanceof Equals || op.fn instanceof LessThanEquals || op.fn instanceof GreaterThanEquals;
            if (isEquals) {
                m1ret.reset(m1ret.rlen, m1ret.clen, 1L);
            }
            return;
        }
        if (m2.isEmpty() && (op.fn instanceof Multiply || op.fn instanceof And)) {
            m1ret.reset(m1ret.rlen, m1ret.clen, 0L);
            return;
        }
        if (m1ret.getNumRows() > 1 && m2.getNumRows() == 1) {
            LibMatrixBincell.safeBinaryInPlaceMatrixRowVector(m1ret, m2, op);
        } else {
            LibMatrixBincell.safeBinaryInPlaceMatrixMatrix(m1ret, m2, op);
        }
    }

    private static void safeBinaryInPlaceMatrixRowVector(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (m1ret.sparse) {
            if (m2.isInSparseFormat() && !op.isRowSafeLeft(m2)) {
                throw new DMLRuntimeException("Invalid row safety of inplace row operation: " + op);
            }
            if (m2.isEmpty()) {
                LibMatrixBincell.safeBinaryInPlaceSparseConst(m1ret, 0.0, op);
            } else {
                if (m2.sparse) {
                    throw new NotImplementedException("Not made sparse vector inplace to sparse " + op);
                }
                LibMatrixBincell.safeBinaryInPlaceSparseVector(m1ret, m2, op);
            }
        } else {
            if (!m1ret.isAllocated()) {
                LOG.warn((Object)"Allocating inplace output block");
                m1ret.allocateBlock();
            }
            if (m2.isEmpty()) {
                LibMatrixBincell.safeBinaryInPlaceDenseConst(m1ret, 0.0, op);
            } else {
                if (m2.sparse) {
                    throw new NotImplementedException("Not made sparse vector inplace to dense " + op);
                }
                LibMatrixBincell.safeBinaryInPlaceDenseVector(m1ret, m2, op);
            }
        }
    }

    private static void safeBinaryInPlaceMatrixMatrix(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (op.fn instanceof Plus && m1ret.isEmpty() && !m1ret.isAllocated()) {
            m1ret.copy(m2);
        } else if (m1ret.sparse && m2.sparse) {
            LibMatrixBincell.safeBinaryInPlaceSparse(m1ret, m2, op);
        } else if (!m1ret.sparse && !m2.sparse) {
            LibMatrixBincell.safeBinaryInPlaceDense(m1ret, m2, op);
        } else if (m2.sparse && (op.fn instanceof Plus || op.fn instanceof Minus)) {
            LibMatrixBincell.safeBinaryInPlaceDenseSparseAdd(m1ret, m2, op);
        } else {
            LibMatrixBincell.safeBinaryInPlaceGeneric(m1ret, m2, op);
        }
    }

    private static void safeBinaryInPlaceSparse(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (m1ret.sparseBlock != null) {
            m1ret.allocateSparseRowsBlock(false);
        }
        if (!(m1ret.sparseBlock instanceof SparseBlockMCSR)) {
            m1ret.sparseBlock = SparseBlockFactory.copySparseBlock(SparseBlock.Type.MCSR, m1ret.sparseBlock, false);
        }
        if (m2.sparseBlock != null) {
            m2.allocateSparseRowsBlock(false);
        }
        SparseBlock c = m1ret.sparseBlock;
        SparseBlock b = m2.sparseBlock;
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        boolean compact = op.fn instanceof Multiply || op.fn instanceof And;
        boolean mcsr = c instanceof SparseBlockMCSR;
        if (c != null && b != null) {
            for (int r = 0; r < rlen; ++r) {
                if (c.isEmpty(r) && b.isEmpty(r)) continue;
                if (b.isEmpty(r)) {
                    LibMatrixBincell.zeroRightForSparseBinary(op, r, m1ret);
                } else if (c.isEmpty(r)) {
                    LibMatrixBincell.appendRightForSparseBinary(op, b.values(r), b.indexes(r), b.pos(r), b.size(r), r, m1ret);
                } else {
                    int estimateSize = Math.min(clen, (!c.isEmpty(r) ? c.size(r) : 0) + (!b.isEmpty(r) ? b.size(r) : 0));
                    SparseRow old = c.get(r);
                    c.set(r, new SparseRowVector(estimateSize), false);
                    m1ret.nonZeros -= (long)old.size();
                    LibMatrixBincell.mergeForSparseBinary(op, old.values(), old.indexes(), 0, old.size(), b.values(r), b.indexes(r), b.pos(r), b.size(r), r, m1ret);
                }
                if (!compact || !mcsr || c.isEmpty(r)) continue;
                c.get(r).compact();
            }
        } else if (c == null) {
            m1ret.sparseBlock = SparseBlockFactory.createSparseBlock(rlen);
            for (int r = 0; r < rlen; ++r) {
                if (b.isEmpty(r)) continue;
                LibMatrixBincell.appendRightForSparseBinary(op, b.values(r), b.indexes(r), b.pos(r), b.size(r), r, m1ret);
            }
        } else {
            for (int r = 0; r < rlen; ++r) {
                if (c.isEmpty(r)) continue;
                LibMatrixBincell.zeroRightForSparseBinary(op, r, m1ret);
            }
        }
        m1ret.recomputeNonZeros();
    }

    private static void safeBinaryInPlaceSparseConst(MatrixBlock m1ret, double m2, BinaryOperator op) {
        if (m1ret.isEmpty()) {
            return;
        }
        SparseBlock sb = m1ret.getSparseBlock();
        int rlen = m1ret.rlen;
        for (int r = 0; r < rlen; ++r) {
            if (sb.isEmpty(r)) continue;
            int apos = sb.pos(r);
            int alen = sb.size(r) + apos;
            double[] avals = sb.values(r);
            for (int k = apos; k < alen; ++k) {
                avals[k] = op.fn.execute(avals[k], m2);
            }
        }
    }

    private static void safeBinaryInPlaceSparseVector(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (m1ret.isEmpty()) {
            return;
        }
        SparseBlock sb = m1ret.getSparseBlock();
        double[] b = m2.getDenseBlockValues();
        int rlen = m1ret.rlen;
        boolean compact = (op.fn instanceof Multiply || op.fn instanceof And) && op.isIntroducingZerosRight(m2);
        boolean mcsr = sb instanceof SparseBlockMCSR;
        for (int r = 0; r < rlen; ++r) {
            if (sb.isEmpty(r)) continue;
            int apos = sb.pos(r);
            int alen = sb.size(r) + apos;
            double[] avals = sb.values(r);
            int[] aix = sb.indexes(r);
            for (int k = apos; k < alen; ++k) {
                avals[k] = op.fn.execute(avals[k], b[aix[k]]);
            }
            if (!compact || !mcsr) continue;
            SparseRow sr = sb.get(r);
            if (sr instanceof SparseRowVector) {
                ((SparseRowVector)sr).setSize(avals.length);
            }
            sr.compact();
        }
        if (compact && !mcsr) {
            ((SparseBlockCSR)sb).compact();
        }
    }

    private static void safeBinaryInPlaceDense(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        if (!m1ret.isAllocated()) {
            m1ret.allocateDenseBlock();
        }
        if (m2.isEmptyBlock(false)) {
            LibMatrixBincell.safeBinaryInPlaceDenseEmpty(m1ret, op);
        } else if (op.fn instanceof Plus) {
            LibMatrixBincell.safeBinaryInPlaceDensePlus(m1ret, m2, op);
        } else {
            LibMatrixBincell.safeBinaryInPlaceDenseGeneric(m1ret, m2, op);
        }
    }

    private static void safeBinaryInPlaceDenseEmpty(MatrixBlock m1ret, BinaryOperator op) {
        DenseBlock a = m1ret.getDenseBlock();
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        long lnnz = 0L;
        for (int r = 0; r < rlen; ++r) {
            double[] avals = a.values(r);
            int c = 0;
            int ix = a.pos(r);
            while (c < clen) {
                double tmp = op.fn.execute(avals[ix], 0.0);
                avals[ix] = tmp;
                lnnz += avals[ix] != 0.0 ? 1L : 0L;
                ++c;
                ++ix;
            }
        }
        m1ret.setNonZeros(lnnz);
    }

    private static void safeBinaryInPlaceDensePlus(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        DenseBlock a = m1ret.getDenseBlock();
        DenseBlock b = m2.getDenseBlock();
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        long lnnz = 0L;
        if (a.isContiguous() && b.isContiguous()) {
            double[] avals = a.values(0);
            double[] bvals = b.values(0);
            for (int i = 0; i < avals.length; ++i) {
                int n = i;
                double d = avals[n] + bvals[i];
                avals[n] = d;
                lnnz += d == 0.0 ? 0L : 1L;
            }
        } else {
            for (int r = 0; r < rlen; ++r) {
                int aix = a.pos(r);
                int bix = b.pos(r);
                double[] avals = a.values(r);
                double[] bvals = b.values(r);
                LibMatrixMult.vectAdd(bvals, avals, bix, aix, clen);
                lnnz += (long)UtilFunctions.computeNnz(avals, aix, clen);
            }
        }
        m1ret.setNonZeros(lnnz);
    }

    private static void safeBinaryInPlaceDenseGeneric(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        DenseBlock a = m1ret.getDenseBlock();
        DenseBlock b = m2.getDenseBlock();
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        long lnnz = 0L;
        for (int r = 0; r < rlen; ++r) {
            double[] avals = a.values(r);
            double[] bvals = b.values(r);
            int c = 0;
            int ix = a.pos(r);
            while (c < clen) {
                double tmp = op.fn.execute(avals[ix], bvals[ix]);
                avals[ix] = tmp;
                lnnz += avals[ix] != 0.0 ? 1L : 0L;
                ++c;
                ++ix;
            }
        }
        m1ret.setNonZeros(lnnz);
    }

    private static void safeBinaryInPlaceDenseConst(MatrixBlock m1ret, double m2, BinaryOperator op) {
        m1ret.allocateDenseBlock();
        DenseBlock a = m1ret.getDenseBlock();
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        long lnnz = 0L;
        for (int r = 0; r < rlen; ++r) {
            double[] avals = a.values(r);
            int c = 0;
            int ix = a.pos(r);
            while (c < clen) {
                double tmp = op.fn.execute(avals[ix], m2);
                avals[ix] = tmp;
                lnnz += avals[ix] != 0.0 ? 1L : 0L;
                ++c;
                ++ix;
            }
        }
        m1ret.setNonZeros(lnnz);
    }

    private static void safeBinaryInPlaceDenseVector(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        m1ret.allocateDenseBlock();
        DenseBlock a = m1ret.getDenseBlock();
        double[] b = m2.getDenseBlockValues();
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        long lnnz = 0L;
        for (int r = 0; r < rlen; ++r) {
            double[] avals = a.values(r);
            int c = 0;
            int ix = a.pos(r);
            while (c < clen) {
                double tmp = op.fn.execute(avals[ix], b[ix % clen]);
                avals[ix] = tmp;
                lnnz += avals[ix] != 0.0 ? 1L : 0L;
                ++c;
                ++ix;
            }
        }
        m1ret.setNonZeros(lnnz);
    }

    private static void safeBinaryInPlaceDenseSparseAdd(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        int rlen = m1ret.rlen;
        DenseBlock a = m1ret.denseBlock;
        SparseBlock b = m2.sparseBlock;
        long nnz = m1ret.getNonZeros();
        for (int r = 0; r < rlen; ++r) {
            if (b.isEmpty(r)) continue;
            int apos = a.pos(r);
            int bpos = b.pos(r);
            int blen = b.size(r);
            int[] bix = b.indexes(r);
            double[] avals = a.values(r);
            double[] bvals = b.values(r);
            for (int k = bpos; k < bpos + blen; ++k) {
                double vold = avals[apos + bix[k]];
                double vnew = op.fn.execute(vold, bvals[k]);
                nnz += vold == 0.0 && vnew != 0.0 ? 1L : (vold != 0.0 && vnew == 0.0 ? -1L : 0L);
                avals[apos + bix[k]] = vnew;
            }
        }
        m1ret.setNonZeros(nnz);
    }

    private static void safeBinaryInPlaceGeneric(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        for (int r = 0; r < rlen; ++r) {
            for (int c = 0; c < clen; ++c) {
                double thisvalue = m1ret.quickGetValue(r, c);
                double thatvalue = m2.quickGetValue(r, c);
                double resultvalue = op.fn.execute(thisvalue, thatvalue);
                m1ret.quickSetValue(r, c, resultvalue);
            }
        }
    }

    private static void unsafeBinaryInPlace(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
        int rlen = m1ret.rlen;
        int clen = m1ret.clen;
        BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1ret, m2);
        if (atype == BinaryAccessType.MATRIX_COL_VECTOR) {
            for (int r = 0; r < rlen; ++r) {
                double v2 = m2.quickGetValue(r, 0);
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1ret.quickGetValue(r, c);
                    double v = op.fn.execute(v1, v2);
                    m1ret.quickSetValue(r, c, v);
                }
            }
        } else if (atype == BinaryAccessType.MATRIX_ROW_VECTOR) {
            for (int r = 0; r < rlen; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1ret.quickGetValue(r, c);
                    double v2 = m2.quickGetValue(0, c);
                    double v = op.fn.execute(v1, v2);
                    m1ret.quickSetValue(r, c, v);
                }
            }
        } else {
            for (int r = 0; r < rlen; ++r) {
                for (int c = 0; c < clen; ++c) {
                    double v1 = m1ret.quickGetValue(r, c);
                    double v2 = m2.quickGetValue(r, c);
                    double v = op.fn.execute(v1, v2);
                    m1ret.quickSetValue(r, c, v);
                }
            }
        }
    }

    private static void mergeForSparseBinary(BinaryOperator op, double[] values1, int[] cols1, int pos1, int size1, double[] values2, int[] cols2, int pos2, int size2, int resultRow, MatrixBlock result) {
        int p1 = 0;
        int p2 = 0;
        if (op.fn instanceof Multiply) {
            if (result.getSparseBlock() == null) {
                result.allocateSparseRowsBlock();
            }
            SparseBlock sblock = result.getSparseBlock();
            sblock.allocate(resultRow, Math.min(size1, size2), result.clen);
            while (p1 < size1 && p2 < size2) {
                int colPos1 = cols1[pos1 + p1];
                int colPos2 = cols2[pos2 + p2];
                if (colPos1 == colPos2) {
                    sblock.append(resultRow, colPos1, op.fn.execute(values1[pos1 + p1], values2[pos2 + p2]));
                }
                p1 += colPos1 <= colPos2 ? 1 : 0;
                p2 += colPos1 >= colPos2 ? 1 : 0;
            }
            result.nonZeros += (long)sblock.size(resultRow);
        } else {
            while (p1 < size1 && p2 < size2) {
                if (cols1[pos1 + p1] < cols2[pos2 + p2]) {
                    result.appendValue(resultRow, cols1[pos1 + p1], op.fn.execute(values1[pos1 + p1], 0.0));
                    ++p1;
                    continue;
                }
                if (cols1[pos1 + p1] == cols2[pos2 + p2]) {
                    result.appendValue(resultRow, cols1[pos1 + p1], op.fn.execute(values1[pos1 + p1], values2[pos2 + p2]));
                    ++p1;
                    ++p2;
                    continue;
                }
                result.appendValue(resultRow, cols2[pos2 + p2], op.fn.execute(0.0, values2[pos2 + p2]));
                ++p2;
            }
            LibMatrixBincell.appendLeftForSparseBinary(op, values1, cols1, pos1, size1, p1, resultRow, result);
            LibMatrixBincell.appendRightForSparseBinary(op, values2, cols2, pos2, size2, p2, resultRow, result);
        }
    }

    private static void appendLeftForSparseBinary(BinaryOperator op, double[] values1, int[] cols1, int pos1, int size1, int pos, int resultRow, MatrixBlock result) {
        for (int j = pos1 + pos; j < pos1 + size1; ++j) {
            double v = op.fn.execute(values1[j], 0.0);
            result.appendValue(resultRow, cols1[j], v);
        }
    }

    private static void appendRightForSparseBinary(BinaryOperator op, double[] vals, int[] ix, int pos, int size, int r, MatrixBlock ret) {
        LibMatrixBincell.appendRightForSparseBinary(op, vals, ix, pos, size, 0, r, ret);
    }

    private static void appendRightForSparseBinary(BinaryOperator op, double[] values2, int[] cols2, int pos2, int size2, int pos, int r, MatrixBlock result) {
        for (int j = pos2 + pos; j < pos2 + size2; ++j) {
            double v = op.fn.execute(0.0, values2[j]);
            result.appendValue(r, cols2[j], v);
        }
    }

    private static void zeroRightForSparseBinary(BinaryOperator op, int r, MatrixBlock ret) {
        if (op.fn instanceof Plus || op.fn instanceof Minus) {
            return;
        }
        SparseBlock c = ret.sparseBlock;
        int apos = c.pos(r);
        int alen = c.size(r);
        double[] values = c.values(r);
        boolean zero = false;
        for (int i = apos; i < apos + alen; ++i) {
            values[i] = op.fn.execute(values[i], 0.0);
            zero |= values[i] == 0.0;
        }
        if (zero) {
            c.compact(r);
        }
    }

    private static class UncellTask
    implements Callable<Long> {
        private final DenseBlock _a;
        private final DenseBlock _c;
        private final UnaryOperator _op;
        private final int _rl;
        private final int _ru;

        protected UncellTask(DenseBlock a, DenseBlock c, UnaryOperator op, int rl, int ru) {
            this._a = a;
            this._c = c;
            this._op = op;
            this._rl = rl;
            this._ru = ru;
        }

        @Override
        public Long call() {
            long nnz = 0L;
            if (this._a.isContiguous(this._rl, this._ru)) {
                double[] avals = this._a.values(this._rl);
                double[] cvals = this._c.values(this._rl);
                int start = this._a.pos(this._rl);
                int end = this._a.pos(this._ru);
                for (int i = start; i < end; ++i) {
                    cvals[i] = this._op.fn.execute(avals[i]);
                    nnz += cvals[i] != 0.0 ? 1L : 0L;
                }
            } else {
                int clen = this._a.getDim(1);
                for (int i = this._rl; i < this._ru; ++i) {
                    double[] avals = this._a.values(i);
                    double[] cvals = this._c.values(i);
                    int pos = this._a.pos(i);
                    for (int j = 0; j < clen; ++j) {
                        cvals[pos + j] = this._op.fn.execute(avals[pos + j]);
                        nnz += cvals[pos + j] != 0.0 ? 1L : 0L;
                    }
                }
            }
            return nnz;
        }
    }

    private static class BincellScalarTask
    implements Callable<Long> {
        private final MatrixBlock _m1;
        private final MatrixBlock _ret;
        private final ScalarOperator _sop;
        private final int _rl;
        private final int _ru;

        protected BincellScalarTask(MatrixBlock m1, MatrixBlock ret, ScalarOperator sop, int rl, int ru) {
            this._m1 = m1;
            this._ret = ret;
            this._sop = sop;
            this._rl = rl;
            this._ru = ru;
        }

        @Override
        public Long call() {
            return LibMatrixBincell.safeBinaryScalar(this._m1, this._ret, this._sop, this._rl, this._ru);
        }
    }

    private static class BincellTask
    implements Callable<Long> {
        private final MatrixBlock _m1;
        private final MatrixBlock _m2;
        private final MatrixBlock _ret;
        private final BinaryOperator _bop;
        BinaryAccessType _atype;
        private final int _rl;
        private final int _ru;

        protected BincellTask(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator bop, BinaryAccessType atype, int rl, int ru) {
            this._m1 = m1;
            this._m2 = m2;
            this._ret = ret;
            this._bop = bop;
            this._atype = atype;
            this._rl = rl;
            this._ru = ru;
        }

        @Override
        public Long call() {
            if (this._bop.sparseSafe || LibMatrixBincell.isSparseSafeDivide(this._bop, this._m2)) {
                return LibMatrixBincell.safeBinary(this._m1, this._m2, this._ret, this._bop, this._atype, this._rl, this._ru);
            }
            return LibMatrixBincell.unsafeBinary(this._m1, this._m2, this._ret, this._bop, this._rl, this._ru);
        }
    }

    public static enum BinaryAccessType {
        MATRIX_MATRIX,
        MATRIX_COL_VECTOR,
        MATRIX_ROW_VECTOR,
        COL_VECTOR_MATRIX,
        ROW_VECTOR_MATRIX,
        OUTER_VECTOR_VECTOR,
        INVALID;


        public boolean isMatrixVector() {
            return this == MATRIX_COL_VECTOR || this == MATRIX_ROW_VECTOR || this == COL_VECTOR_MATRIX || this == ROW_VECTOR_MATRIX;
        }
    }
}

