From 320924278d79df5ce7be62504423901f7509e818 Mon Sep 17 00:00:00 2001 From: raver119 Date: Sat, 9 May 2020 08:06:14 +0300 Subject: [PATCH] Legacy API changes (#441) * initial commit Signed-off-by: raver119@gmail.com * another initial commit Signed-off-by: raver119@gmail.com * another initial commit Signed-off-by: raver119@gmail.com * one more initial commit Signed-off-by: raver119@gmail.com * next step Signed-off-by: raver119@gmail.com * next step Signed-off-by: raver119@gmail.com * next step Signed-off-by: raver119@gmail.com * next step Signed-off-by: raver119@gmail.com * Refactored buffer() and shapeInfo() methods usage with NDArray class. Signed-off-by: shugeo * Adopt Graph class methods to use const shapes. Signed-off-by: shugeo * Adopt choose op to use constant shapes. Signed-off-by: shugeo * Adopt where op shape method to use constant shapes. Signed-off-by: shugeo * Adopt lstsq op to use constant empty shapes. Signed-off-by: shugeo * Adopt matrix_diag_part op shape routine to use constant shapes. Signed-off-by: shugeo * Adopt determinant ops to use constant shapes. Signed-off-by: shugeo * Adopt mean_pairwssqerr_loss ops to use constant shapes. Signed-off-by: shugeo * Adopt ops shape methods. Signed-off-by: shugeo * Adopt shape methods for loss ops. Signed-off-by: shugeo * Adopt log_loss op shape method. Signed-off-by: shugeo * Adopt shape methods for ops. Signed-off-by: shugeo * Adopt dilation2d ops shape methods. Signed-off-by: shugeo * Adopted deconv2d ops shape methods. Signed-off-by: shugeo * Adopted dynamicRNN op shape method. Signed-off-by: shugeo * Adopted shape methods for ops. Signed-off-by: shugeo * Adopted shape methods for lstm layer ops. Signed-off-by: shugeo * few updates Signed-off-by: raver119@gmail.com * first cuda tweak Signed-off-by: raver119@gmail.com * Adopt constant shapes for sconv2d ops. Signed-off-by: shugeo * Adopt constant shapes for gru ops. Signed-off-by: shugeo * Adopt constant shapes with shape methods for segment ops and so on. Signed-off-by: shugeo * Adopted constant shapes with unsorted_segment_* ops. Signed-off-by: shugeo * Adopted constant shapes with gamma op shape method. Signed-off-by: shugeo * Adopted shape methods of reduce_stddev ops. Signed-off-by: shugeo * Adopted shape methods for reduce_* ops. Signed-off-by: shugeo * Adopt shape method for squeeze op. Signed-off-by: shugeo * Adopt strided_slice shape method. Signed-off-by: shugeo * Refactored concat op shape method to adopt constant shapes. Signed-off-by: shugeo * Adopted shape method for mirror_pad op. Signed-off-by: shugeo * Adopted split op shape method. Signed-off-by: shugeo * Adopted tile ops shape methods. Signed-off-by: shugeo * Added const cast for mkldnn routines handles. Signed-off-by: shugeo * Refactored logSoftMaxForVector_ routine to conform with proper data and shape pointer casts. Signed-off-by: shugeo * Cosmetic changes to proper usage of constant pointers. Signed-off-by: shugeo * Refactored a couple shape comparators for strides and addBias helpers to proper use data pointers with inplace option. Signed-off-by: shugeo * Refactored depthToSpace helpers. Signed-off-by: shugeo * Refactored histogram helpers. Signed-off-by: shugeo * Refactored im2col helpers. Signed-off-by: shugeo * Refactored gather and gatherND helpers. Signed-off-by: shugeo * Fixed buffer usage on percentile helper. Signed-off-by: shugeo * Fixed gather shape with helpers and range buffer usage. Signed-off-by: shugeo * Fixed buffer usage with space to depth helpers. Signed-off-by: shugeo * Fixed buffer usage and constant shapes. Signed-off-by: shugeo * Fixed buffer usage with LUP decomposition> Signed-off-by: shugeo * Refactored onehot_ helper. Signed-off-by: shugeo * Refactored pad and prefix to use constant shapes. Signed-off-by: shugeo * Refactoed softmax helpers. Signed-off-by: shugeo * Fixed space to batch helpers to use buffers properly. Signed-off-by: shugeo * Fixed stack and split helpers. Signed-off-by: shugeo * Fixed buffer usage with sparse to dense helpers. Signed-off-by: shugeo * Fixed buffer usage with mindistance_ helpers. Signed-off-by: shugeo * Fixed buffer usage with tile helper. Signed-off-by: shugeo * Fixed constant shape usage. Signed-off-by: shugeo * Fixed constant shape usage with legacy pairwise bool ops. Signed-off-by: shugeo * Refactored a couple of methods to adopt constant shape usage. Signed-off-by: shugeo * Fixed broadcasting with constant shape." Signed-off-by: shugeo * Fixed const usage with inplace reverse and constant shapes with legacy reduction. Signed-off-by: shugeo * Refactored legacy ops with const shapes. Signed-off-by: shugeo * Refactored sort to adopt constant shapes. Signed-off-by: shugeo * Corrected sort for constant shape usage. Signed-off-by: shugeo * Fixed constant shape usage with special methods. Signed-off-by: shugeo * Refactored Context to conform with constant shape usage. Signed-off-by: shugeo * CUDA broadcasting headers Signed-off-by: raver119@gmail.com * pairwise/indexreduce/random headers Signed-off-by: raver119@gmail.com * Refactored native ops to adopt constant shapes. Signed-off-by: shugeo * legacy reduce3/scalar headers Signed-off-by: raver119@gmail.com * Corrected pullRow signature and tests. Signed-off-by: shugeo * Corrected routines to proper use of constant shapes. Signed-off-by: shugeo * Refactored tests to use constant shapes properly. Signed-off-by: shugeo * Refactored legacy ops tests to use constant shapes properly. Signed-off-by: shugeo * Refactored buffer usage with NDArray tests. Signed-off-by: shugeo * Fixed native ops tests. Signed-off-by: shugeo * Fixed special concat routine. Signed-off-by: shugeo * Fixed buffer usage with test. Signed-off-by: shugeo * Fixed buffer usage with a test. Signed-off-by: shugeo * Refactored TAD.h and tests. Signed-off-by: shugeo * Refactored calcStrides* routines to use constant shapes. Signed-off-by: shugeo * Fixed miscelaneous errors with constant shapes. Signed-off-by: shugeo * NativeOps const changes Signed-off-by: raver119@gmail.com * Corrected definitions for declared functions. Signed-off-by: shugeo * NativeOps const changes Signed-off-by: raver119@gmail.com * few more const changes Signed-off-by: raver119@gmail.com * Fixed const shapes with shape routines. Signed-off-by: shugeo * few more const changes Signed-off-by: raver119@gmail.com * Fixed shape method for broadcastable case. Signed-off-by: shugeo * few more const changes Signed-off-by: raver119@gmail.com * xw_plus_b BP shape fn restored Signed-off-by: raver119@gmail.com * Fixed signatures with broadcasting. Signed-off-by: shugeo * Repaired backprops shape methods for a set of operations. Signed-off-by: shugeo * Refactored broadcast bool for cuda. Signed-off-by: shugeo * Refactored methods for 3 args with const qualifier. Signed-off-by: shugeo * Fixed a couple of kernel signatures for broadcasting. Signed-off-by: shugeo * Fixed kernels signatures for const buffers and shapes. Signed-off-by: shugeo * Refactored pairwise methods to persistent buffers and shapes usage. Signed-off-by: shugeo * Adopt const to buffers and shapes with kernels. Signed-off-by: shugeo * Adopt const to buffers and shapes with scalar kernels. Signed-off-by: shugeo * Refactored indexreduce kernels signatures to use const buffers and shapes. Signed-off-by: shugeo * Refactored pairwise kernels to adopt cons shapes and buffers. Signed-off-by: shugeo * Refactored pairwise bool kernels to adopt cons shapes and buffers. Signed-off-by: shugeo * Refactored random special ops to conform with const shapes and buffers. Signed-off-by: shugeo * Refactored native ops to conform with const shapes and buffers under cuda platform. Signed-off-by: shugeo * Cosmetical changes only. Signed-off-by: shugeo * Fixed const shapes and buffers error. Signed-off-by: shugeo * Corrected start pos routine. Signed-off-by: shugeo * Refactored methods to conform with const shapes and buffers. Signed-off-by: shugeo * Refactored helpers to use proper methods instead. Signed-off-by: shugeo * bunch of changes Signed-off-by: raver119@gmail.com * next bunch of changes Signed-off-by: raver119@gmail.com * next bunch of changes Signed-off-by: raver119@gmail.com * Fixed execScalar declaration. Signed-off-by: shugeo * Fixed execScalar declaration. Signed-off-by: shugeo * Corrected const shape cases with sort and so on. Signed-off-by: shugeo * Fixed const shapes for sort. Signed-off-by: shugeo * Refactored kernel declarations to adopt const shapes. Signed-off-by: shugeo * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo * Corrected kernel declarations to adopt const shapes and buffers. Signed-off-by: shugeo * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo * Fixed segment helpers kernels declarations and so on to adopt const shapes. Signed-off-by: shugeo * Fixed const shape usage with segment and solve helpers. Signed-off-by: shugeo * Fixed kernel declaration with adjustWeight helper. Signed-off-by: shugeo * Fixed cuda implementations for constant shape helpers. Signed-off-by: shugeo * Adopted const shape usage with kernels. Signed-off-by: shugeo * Adopted top_k kernels to use const shapes and buffers. Signed-off-by: shugeo * Corrected kernels declarations to adopt const shapes with helpers. Signed-off-by: shugeo * Refactored NDArray definitions to adopt const shapes and buffers. Signed-off-by: shugeo * Fixed const shapes with image suppression helpers. Signed-off-by: shugeo * Slight improvement with buffers. Signed-off-by: shugeo * Refactored buffer usage. Signed-off-by: shugeo * Refactored buffer usage with tests. Signed-off-by: shugeo * Fixed const shape usage with definitions. Signed-off-by: shugeo * minor updates on cpu side Signed-off-by: raver119@gmail.com * Refactored const shape usage with ConstantDescritor and native ops with cuda platform. Signed-off-by: shugeo * Refactored tear and tile kernels to adopt with const shapes. Signed-off-by: shugeo * softmax_loop fix Signed-off-by: raver119 * update missing signature Signed-off-by: raver119@gmail.com * softmax again Signed-off-by: raver119@gmail.com * few more missing consts Signed-off-by: raver119 * new methods updated Signed-off-by: raver119@gmail.com Co-authored-by: shugeo --- libnd4j/include/array/ConstantDescriptor.h | 2 +- libnd4j/include/array/NDArray.h | 110 ++- libnd4j/include/array/NDArray.hXX | 666 +++++++++-------- libnd4j/include/array/NDArrayLambda.hXX | 56 +- libnd4j/include/array/ShapeList.h | 18 +- libnd4j/include/array/TadPack.h | 16 +- libnd4j/include/array/cpu/NDArray.cpp | 70 +- libnd4j/include/array/cuda/NDArray.cu | 60 +- .../include/array/impl/ConstantDescriptor.cpp | 2 +- libnd4j/include/array/impl/NDArrayFactory.cpp | 2 +- libnd4j/include/array/impl/NDArrayList.cpp | 2 +- libnd4j/include/array/impl/ShapeList.cpp | 34 +- libnd4j/include/array/impl/TadPack.cpp | 13 +- libnd4j/include/graph/Context.h | 10 +- libnd4j/include/graph/impl/Context.cpp | 28 +- libnd4j/include/graph/impl/Graph.cpp | 48 +- libnd4j/include/graph/profiling/NodeProfile.h | 4 +- .../graph/profiling/impl/NodeProfile.cpp | 4 +- libnd4j/include/helpers/ConstantShapeHelper.h | 22 +- libnd4j/include/helpers/Loops.h | 77 +- libnd4j/include/helpers/ShapeUtils.h | 30 +- libnd4j/include/helpers/TAD.h | 50 +- .../helpers/cpu/ConstantShapeHelper.cpp | 43 +- libnd4j/include/helpers/cpu/MmulHelper.cpp | 38 +- .../helpers/cpu/loops/IndexReductionLoops.hpp | 12 +- .../cpu/loops/IndexReductionLoops_int32_0.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int32_1.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int32_2.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int32_3.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int32_4.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int32_5.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int32_6.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int32_7.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int32_8.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int32_9.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_0.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_1.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_2.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_3.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_4.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_5.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_6.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_7.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_8.cpp | 2 +- .../cpu/loops/IndexReductionLoops_int64_9.cpp | 2 +- .../helpers/cpu/loops/Reduction3Loops_0.cpp | 8 +- .../helpers/cpu/loops/Reduction3Loops_1.cpp | 8 +- .../helpers/cpu/loops/Reduction3Loops_2.cpp | 8 +- .../helpers/cpu/loops/Reduction3Loops_3.cpp | 8 +- .../helpers/cpu/loops/ReductionLoops_bool.cpp | 11 +- .../cpu/loops/ReductionLoops_float_0.cpp | 10 +- .../cpu/loops/ReductionLoops_float_1.cpp | 11 +- .../cpu/loops/ReductionLoops_float_2.cpp | 8 +- .../cpu/loops/ReductionLoops_float_3.cpp | 8 +- .../helpers/cpu/loops/ReductionLoops_long.cpp | 8 +- .../helpers/cpu/loops/ReductionLoops_same.cpp | 8 +- .../helpers/cuda/ConstantShapeHelper.cu | 20 +- .../include/helpers/cuda_off/MmulHelper.cu | 56 +- libnd4j/include/helpers/impl/MmulHelper.cpp | 12 +- libnd4j/include/helpers/impl/ShapeUtils.cpp | 523 +++++++------ libnd4j/include/helpers/shape.h | 186 ++--- libnd4j/include/legacy/NativeOpExecutioner.h | 701 +++++++++--------- libnd4j/include/legacy/NativeOps.h | 318 ++++---- .../legacy/cpu/NativeOpExecutioner.cpp | 687 +++++++++-------- libnd4j/include/legacy/cpu/NativeOps.cpp | 453 +++++------ .../legacy/cuda/NativeOpExecutioner.cu | 478 ++++++------ libnd4j/include/legacy/cuda/NativeOps.cu | 323 ++++---- .../include/loops/BroadcastScalarConverter.h | 1 + libnd4j/include/loops/broadcasting.h | 163 ++-- libnd4j/include/loops/broadcasting_bool.h | 147 ++-- libnd4j/include/loops/broadcasting_int.h | 150 ++-- libnd4j/include/loops/cpu/broadcasting.hpp | 96 +-- .../include/loops/cpu/broadcasting_bool.hpp | 108 +-- .../include/loops/cpu/broadcasting_int.hpp | 100 +-- libnd4j/include/loops/cpu/indexreduce.hpp | 30 +- libnd4j/include/loops/cpu/pairwise.hpp | 63 +- libnd4j/include/loops/cpu/pairwise_bool.cpp | 71 +- libnd4j/include/loops/cpu/pairwise_int.cpp | 74 +- libnd4j/include/loops/cpu/random.hpp | 33 +- .../include/loops/cpu/reduce/reduce_bool.cpp | 71 +- .../include/loops/cpu/reduce/reduce_float.hpp | 71 +- .../include/loops/cpu/reduce/reduce_long.cpp | 74 +- .../include/loops/cpu/reduce/reduce_same.cpp | 74 +- libnd4j/include/loops/cpu/reduce3.hpp | 100 +-- libnd4j/include/loops/cpu/scalar.hpp | 86 +-- libnd4j/include/loops/cpu/scalar_bool.cpp | 100 ++- libnd4j/include/loops/cpu/scalar_int.cpp | 99 ++- .../include/loops/cpu/summarystatsreduce.cpp | 55 +- .../loops/cpu/transform/transform_any.cpp | 22 +- .../loops/cpu/transform/transform_bool.cpp | 25 +- .../loops/cpu/transform/transform_float.cpp | 25 +- .../loops/cpu/transform/transform_same.cpp | 18 +- .../loops/cpu/transform/transform_strict.cpp | 25 +- libnd4j/include/loops/cuda/broadcasting.chpp | 68 +- .../include/loops/cuda/broadcasting_bool.cu | 64 +- .../include/loops/cuda/broadcasting_int.cu | 76 +- libnd4j/include/loops/cuda/indexreduce.cu | 34 +- libnd4j/include/loops/cuda/pairwise.chpp | 18 +- libnd4j/include/loops/cuda/pairwise_bool.cu | 18 +- libnd4j/include/loops/cuda/pairwise_int.cu | 18 +- libnd4j/include/loops/cuda/random.cu | 103 ++- .../include/loops/cuda/reduce/reduce_bool.cu | 67 +- .../loops/cuda/reduce/reduce_float.chpp | 73 +- .../include/loops/cuda/reduce/reduce_long.cu | 80 +- .../include/loops/cuda/reduce/reduce_same.cu | 46 +- libnd4j/include/loops/cuda/reduce3.chpp | 128 ++-- libnd4j/include/loops/cuda/scalar.chpp | 35 +- libnd4j/include/loops/cuda/scalar_bool.cu | 76 +- libnd4j/include/loops/cuda/scalar_int.cu | 74 +- .../cuda/specials/bitonicArbitraryStep.cu | 12 +- .../loops/cuda/specials/bitonicSortStep.cu | 12 +- .../cuda/specials/fillDimensionalIsMax.cu | 28 +- .../include/loops/cuda/specials/fillIsMax.cu | 6 +- libnd4j/include/loops/cuda/specials/oesTad.cu | 24 +- .../loops/cuda/specials/pullRowsKernel.cu | 14 +- .../loops/cuda/specials/swapUnsafeKernel.cu | 8 +- .../include/loops/cuda/specials/tearKernel.cu | 20 +- .../include/loops/cuda/specials/tileKernel.cu | 20 +- .../include/loops/cuda/summarystatsreduce.cu | 24 +- .../loops/cuda/transform/transform_any.cu | 46 +- .../loops/cuda/transform/transform_bool.cu | 43 +- .../loops/cuda/transform/transform_float.cu | 50 +- .../loops/cuda/transform/transform_same.cu | 24 +- .../loops/cuda/transform/transform_strict.cu | 36 +- .../include/loops/cuda/type_conversions.cu | 26 +- .../include/loops/impl/type_conversions.cpp | 12 +- libnd4j/include/loops/indexreduce.h | 62 +- libnd4j/include/loops/pairwise_bool.h | 78 +- libnd4j/include/loops/pairwise_int.h | 78 +- libnd4j/include/loops/pairwise_transform.h | 78 +- libnd4j/include/loops/random.h | 52 +- libnd4j/include/loops/reduce3.h | 154 +++- libnd4j/include/loops/reduce_bool.h | 78 +- libnd4j/include/loops/reduce_float.h | 76 +- libnd4j/include/loops/reduce_long.h | 80 +- libnd4j/include/loops/reduce_same.h | 83 +-- libnd4j/include/loops/scalar.h | 78 +- libnd4j/include/loops/scalar_bool.h | 98 ++- libnd4j/include/loops/scalar_int.h | 100 ++- libnd4j/include/loops/special_kernels.h | 31 +- libnd4j/include/loops/summarystatsreduce.h | 65 +- libnd4j/include/loops/transform_any.h | 32 +- libnd4j/include/loops/transform_bool.h | 41 +- libnd4j/include/loops/transform_float.h | 72 +- libnd4j/include/loops/transform_same.h | 41 +- libnd4j/include/loops/transform_strict.h | 41 +- libnd4j/include/loops/type_conversions.h | 8 +- .../ops/declarable/generic/boolean/choose.cpp | 8 +- .../ops/declarable/generic/boolean/where.cpp | 10 +- .../generic/broadcastable/multiply.cpp | 16 +- .../generic/broadcastable/percentile.cpp | 5 +- .../declarable/generic/broadcastable/pow.cpp | 8 +- .../declarable/generic/compression/bitmap.cpp | 2 +- .../ops/declarable/generic/linalg/lstsq.cpp | 6 +- .../generic/linalg/matrixDiagPart.cpp | 15 +- .../generic/linalg/matrix_determinant.cpp | 6 +- .../ops/declarable/generic/linalg/qr.cpp | 4 +- .../generic/loss/absoluteDifference.cpp | 12 +- .../generic/loss/cosineDistance.cpp | 16 +- .../ops/declarable/generic/loss/hingeLoss.cpp | 14 +- .../ops/declarable/generic/loss/huberLoss.cpp | 24 +- .../ops/declarable/generic/loss/logLoss.cpp | 12 +- .../generic/loss/log_poisson_loss.cpp | 24 +- .../generic/loss/meanPairWsSqErr.cpp | 12 +- .../ops/declarable/generic/loss/meanSqErr.cpp | 14 +- .../generic/loss/sigmCrossEntropy.cpp | 12 +- .../generic/loss/softmaxCrossEntropy.cpp | 18 +- .../ops/declarable/generic/nn/batchnorm.cpp | 6 +- .../declarable/generic/nn/convo/conv1d.cpp | 6 +- .../declarable/generic/nn/convo/conv3d.cpp | 8 +- .../declarable/generic/nn/convo/deconv2d.cpp | 4 +- .../declarable/generic/nn/convo/deconv3d.cpp | 4 +- .../generic/nn/convo/depthwiseConv2d.cpp | 16 +- .../generic/nn/convo/dilation2d.cpp | 6 +- .../generic/nn/convo/pointwiseConv2d.cpp | 7 +- .../declarable/generic/nn/convo/sconv2d.cpp | 8 +- .../generic/nn/dot_product_attention.cpp | 6 +- .../generic/nn/pooling/maxpool2d.cpp | 4 +- .../generic/nn/pooling/maxpool3d.cpp | 2 +- .../generic/nn/pooling/pnormpool2d.cpp | 2 +- .../nn/recurrent/dynamicBidirectionalRNN.cpp | 8 +- .../generic/nn/recurrent/dynamicRNN.cpp | 4 +- .../declarable/generic/nn/recurrent/gru.cpp | 12 +- .../generic/nn/recurrent/lstmBlock.cpp | 2 +- .../generic/nn/recurrent/lstmBlockCell.cpp | 2 +- .../generic/nn/recurrent/lstmLayer.cpp | 26 +- .../generic/nn/recurrent/lstmLayerCell.cpp | 14 +- .../declarable/generic/nn/recurrent/sru.cpp | 10 +- .../nn/recurrent/staticBidirectionalRNN.cpp | 6 +- .../generic/nn/recurrent/staticRNN.cpp | 4 +- .../ops/declarable/generic/nn/relu_layer.cpp | 2 +- .../ops/declarable/generic/nn/xw_plus_b.cpp | 4 +- .../parity_ops/broadcast_dynamic_shape.cpp | 2 +- .../parity_ops/non_max_suppression.cpp | 6 +- .../non_max_suppression_overlaps.cpp | 3 +- .../generic/parity_ops/nth_element.cpp | 11 +- .../declarable/generic/parity_ops/onehot.cpp | 3 +- .../generic/parity_ops/segment_max.cpp | 4 +- .../generic/parity_ops/segment_mean.cpp | 5 +- .../generic/parity_ops/segment_min.cpp | 7 +- .../generic/parity_ops/segment_prod.cpp | 4 +- .../generic/parity_ops/segment_sum.cpp | 6 +- .../declarable/generic/parity_ops/unique.cpp | 4 +- .../parity_ops/unsorted_segment_max.cpp | 4 +- .../parity_ops/unsorted_segment_mean.cpp | 6 +- .../parity_ops/unsorted_segment_min.cpp | 6 +- .../parity_ops/unsorted_segment_prod.cpp | 6 +- .../parity_ops/unsorted_segment_sqrt_n.cpp | 6 +- .../parity_ops/unsorted_segment_sum.cpp | 4 +- .../declarable/generic/random/bernoulli.cpp | 2 +- .../ops/declarable/generic/random/gamma.cpp | 6 +- .../ops/declarable/generic/random/normal.cpp | 2 +- .../declarable/generic/reduce/reduceStDev.cpp | 5 +- .../generic/reduce/reduce_logsumexp.cpp | 2 +- .../declarable/generic/reduce/reduce_max.cpp | 2 +- .../declarable/generic/reduce/reduce_min.cpp | 2 +- .../generic/reduce/reduce_sqnorm.cpp | 2 +- .../declarable/generic/shape/reshape_as.cpp | 2 +- .../ops/declarable/generic/shape/squeeze.cpp | 4 +- .../generic/tensor/strided_slice.cpp | 11 +- .../declarable/generic/transforms/concat.cpp | 30 +- .../generic/transforms/mirrorPad.cpp | 23 +- .../generic/transforms/parallelStack.cpp | 2 +- .../declarable/generic/transforms/slice.cpp | 2 +- .../declarable/generic/transforms/split.cpp | 9 +- .../declarable/generic/transforms/stack.cpp | 2 +- .../declarable/generic/transforms/tile.cpp | 6 +- .../generic/updaters/adaDeltaUpdater.cpp | 8 +- .../generic/updaters/adaGradUpdater.cpp | 4 +- .../generic/updaters/adaMaxUpdater.cpp | 8 +- .../generic/updaters/adamUpdater.cpp | 8 +- .../generic/updaters/amsGradUpdater.cpp | 12 +- .../generic/updaters/nadamUpdater.cpp | 8 +- .../generic/updaters/nesterovsUpdater.cpp | 4 +- .../generic/updaters/rmsPropUpdater.cpp | 4 +- .../generic/util/print_affinity.cpp | 2 +- .../ops/declarable/helpers/compression.h | 2 +- .../ops/declarable/helpers/convolutions.h | 2 +- .../declarable/helpers/cpu/BarnesHutTsne.cpp | 14 +- .../declarable/helpers/cpu/activations.cpp | 24 +- .../ops/declarable/helpers/cpu/addBias.cpp | 64 +- .../ops/declarable/helpers/cpu/adjust_hue.cpp | 4 +- .../helpers/cpu/adjust_saturation.cpp | 4 +- .../declarable/helpers/cpu/batched_gemm.cpp | 4 +- .../ops/declarable/helpers/cpu/batchnorm.cpp | 42 +- .../ops/declarable/helpers/cpu/col2im.cpp | 18 +- .../declarable/helpers/cpu/compare_elem.cpp | 2 +- .../helpers/cpu/compression/compression.cpp | 2 +- .../helpers/cpu/compression/threshold.cpp | 2 +- .../helpers/cpu/convolutions_col2vol.cpp | 2 +- .../helpers/cpu/convolutions_vol2col.cpp | 2 +- .../ops/declarable/helpers/cpu/d_t_s.cpp | 2 +- .../ops/declarable/helpers/cpu/dilation2d.cpp | 6 +- .../ops/declarable/helpers/cpu/gather.cpp | 33 +- .../helpers/cpu/gatherTransforms.cpp | 22 +- .../ops/declarable/helpers/cpu/histogram.cpp | 6 +- .../ops/declarable/helpers/cpu/im2col.cpp | 11 +- .../declarable/helpers/cpu/imagesHelpers.cpp | 18 +- .../ops/declarable/helpers/cpu/ismax.cpp | 6 +- .../ops/declarable/helpers/cpu/lrn.cpp | 20 +- .../ops/declarable/helpers/cpu/lstsq.cpp | 2 +- .../ops/declarable/helpers/cpu/lup.cpp | 6 +- .../declarable/helpers/cpu/matrixSetDiag.cpp | 6 +- .../ops/declarable/helpers/cpu/merge.cpp | 8 +- .../declarable/helpers/cpu/nth_element.cpp | 2 +- .../ops/declarable/helpers/cpu/one_hot.cpp | 6 +- .../ops/declarable/helpers/cpu/pad.cpp | 28 +- .../ops/declarable/helpers/cpu/percentile.cpp | 2 +- .../ops/declarable/helpers/cpu/prefix.cpp | 6 +- .../ops/declarable/helpers/cpu/random.cpp | 12 +- .../declarable/helpers/cpu/randomShuffle.cpp | 2 +- .../declarable/helpers/cpu/random_crop.cpp | 4 +- .../ops/declarable/helpers/cpu/range.cpp | 2 +- .../ops/declarable/helpers/cpu/reverse.cpp | 18 +- .../ops/declarable/helpers/cpu/s_t_b.cpp | 24 +- .../ops/declarable/helpers/cpu/s_t_d.cpp | 2 +- .../ops/declarable/helpers/cpu/scatter.cpp | 4 +- .../ops/declarable/helpers/cpu/softmax.cpp | 29 +- .../ops/declarable/helpers/cpu/split.cpp | 10 +- .../ops/declarable/helpers/cpu/stack.cpp | 15 +- .../ops/declarable/helpers/cpu/tile.cpp | 12 +- .../ops/declarable/helpers/cpu/top_k.cpp | 2 +- .../helpers/cpu/updaterAdaDelta.cpp | 24 +- .../declarable/helpers/cpu/updaterAdaGrad.cpp | 16 +- .../declarable/helpers/cpu/updaterAdaMax.cpp | 24 +- .../declarable/helpers/cpu/updaterAdam.cpp | 24 +- .../declarable/helpers/cpu/updaterAmsGrad.cpp | 32 +- .../declarable/helpers/cpu/updaterNadam.cpp | 24 +- .../helpers/cpu/updaterNesterovs.cpp | 16 +- .../declarable/helpers/cpu/updaterRmsProp.cpp | 16 +- .../declarable/helpers/cuda/BarnesHutTsne.cu | 18 +- .../declarable/helpers/cuda/activations.cu | 18 +- .../ops/declarable/helpers/cuda/addBias.cu | 4 +- .../ops/declarable/helpers/cuda/adjust_hue.cu | 14 +- .../helpers/cuda/adjust_saturation.cu | 14 +- .../declarable/helpers/cuda/batched_gemm.cu | 6 +- .../ops/declarable/helpers/cuda/batchnorm.cu | 6 +- .../ops/declarable/helpers/cuda/betaInc.cu | 2 +- .../ops/declarable/helpers/cuda/col2im.cu | 2 +- .../helpers/cuda/compression/compression.cu | 2 +- .../helpers/cuda/compression/threshold.cu | 12 +- .../ops/declarable/helpers/cuda/concat.cu | 25 +- .../ops/declarable/helpers/cuda/confusion.cu | 12 +- .../helpers/cuda/convolutions_col2vol.cu | 2 +- .../helpers/cuda/convolutions_pooling2d.cu | 12 +- .../helpers/cuda/convolutions_pooling2dBP.cu | 2 +- .../helpers/cuda/convolutions_pooling3d.cu | 2 +- .../helpers/cuda/convolutions_pooling3dBP.cu | 2 +- .../helpers/cuda/convolutions_upsampling2d.cu | 2 +- .../cuda/convolutions_upsampling2dBP.cu | 2 +- .../helpers/cuda/convolutions_upsampling3d.cu | 2 +- .../cuda/convolutions_upsampling3dBP.cu | 2 +- .../helpers/cuda/convolutions_vol2col.cu | 2 +- .../ops/declarable/helpers/cuda/cross.cu | 2 +- .../ops/declarable/helpers/cuda/d_t_s.cu | 11 +- .../ops/declarable/helpers/cuda/diGamma.cu | 2 +- .../ops/declarable/helpers/cuda/diag.cu | 8 +- .../ops/declarable/helpers/cuda/dilation2d.cu | 2 +- .../ops/declarable/helpers/cuda/dropout.cu | 10 +- .../ops/declarable/helpers/cuda/dynamic.cu | 72 +- .../helpers/cuda/extract_patches.cu | 6 +- .../helpers/cuda/fake_quantization.cu | 12 +- .../ops/declarable/helpers/cuda/flatten.cu | 8 +- .../ops/declarable/helpers/cuda/gather.cu | 4 +- .../ops/declarable/helpers/cuda/gather_nd.cu | 2 +- .../ops/declarable/helpers/cuda/hamming.cu | 6 +- .../ops/declarable/helpers/cuda/histogram.cu | 8 +- .../helpers/cuda/histogramFixedWidth.cu | 6 +- .../ops/declarable/helpers/cuda/im2col.cu | 2 +- .../helpers/cuda/image_draw_bounding_boxes.cu | 15 +- .../declarable/helpers/cuda/image_resize.cu | 36 +- .../helpers/cuda/image_suppression.cu | 12 +- .../declarable/helpers/cuda/imagesHelpers.cu | 38 +- .../ops/declarable/helpers/cuda/ismax.cu | 2 +- .../ops/declarable/helpers/cuda/lrn.cu | 12 +- .../ops/declarable/helpers/cuda/lstsq.cu | 4 +- .../ops/declarable/helpers/cuda/lup.cu | 131 ++-- .../declarable/helpers/cuda/matrixSetDiag.cu | 2 +- .../declarable/helpers/cuda/matrix_band.cu | 19 +- .../helpers/cuda/matrix_diag_part.cu | 12 +- .../declarable/helpers/cuda/max_pooling.cu | 2 +- .../ops/declarable/helpers/cuda/merge.cu | 95 +-- .../ops/declarable/helpers/cuda/meshgrid.cu | 8 +- .../declarable/helpers/cuda/nth_element.cu | 4 +- .../ops/declarable/helpers/cuda/one_hot.cu | 4 +- .../ops/declarable/helpers/cuda/pad.cu | 10 +- .../ops/declarable/helpers/cuda/percentile.cu | 9 +- .../ops/declarable/helpers/cuda/polyGamma.cu | 2 +- .../ops/declarable/helpers/cuda/prefix.cu | 6 +- .../declarable/helpers/cuda/print_variable.cu | 2 +- .../include/ops/declarable/helpers/cuda/qr.cu | 4 +- .../ops/declarable/helpers/cuda/random.cu | 19 +- .../ops/declarable/helpers/cuda/range.cu | 2 +- .../ops/declarable/helpers/cuda/reverse.cu | 29 +- .../ops/declarable/helpers/cuda/roll.cu | 24 +- .../ops/declarable/helpers/cuda/s_t_b.cu | 12 +- .../ops/declarable/helpers/cuda/s_t_d.cu | 13 +- .../ops/declarable/helpers/cuda/scatter.cu | 40 +- .../declarable/helpers/cuda/scatter_simple.cu | 10 +- .../declarable/helpers/cuda/scatter_update.cu | 6 +- .../ops/declarable/helpers/cuda/segment.cu | 12 +- .../declarable/helpers/cuda/segment_max.cu | 108 +-- .../declarable/helpers/cuda/segment_mean.cu | 80 +- .../declarable/helpers/cuda/segment_min.cu | 145 ++-- .../declarable/helpers/cuda/segment_prod.cu | 103 ++- .../declarable/helpers/cuda/segment_sqrtn.cu | 46 +- .../declarable/helpers/cuda/segment_sum.cu | 141 ++-- .../declarable/helpers/cuda/sequence_mask.cu | 6 +- .../ops/declarable/helpers/cuda/sg_cb.cu | 4 +- .../ops/declarable/helpers/cuda/solve.cu | 20 +- .../ops/declarable/helpers/cuda/split.cu | 18 +- .../ops/declarable/helpers/cuda/sru.cu | 4 +- .../ops/declarable/helpers/cuda/stack.cu | 30 +- .../ops/declarable/helpers/cuda/svd.cu | 22 +- .../ops/declarable/helpers/cuda/top_k.cu | 22 +- .../ops/declarable/helpers/cuda/transforms.cu | 42 +- .../helpers/cuda/triangular_solve.cu | 30 +- .../helpers/cuda/updaterAdaDelta.cu | 10 +- .../declarable/helpers/cuda/updaterAdaGrad.cu | 10 +- .../declarable/helpers/cuda/updaterAdaMax.cu | 10 +- .../declarable/helpers/cuda/updaterAdam.cu | 10 +- .../declarable/helpers/cuda/updaterAmsGrad.cu | 12 +- .../declarable/helpers/cuda/updaterNadam.cu | 10 +- .../helpers/cuda/updaterNesterovs.cu | 10 +- .../declarable/helpers/cuda/updaterRmsProp.cu | 8 +- .../ops/declarable/helpers/cuda/weights.cu | 12 +- .../ops/declarable/helpers/cuda/zeta.cu | 2 +- .../ops/declarable/helpers/dilation2d.h | 2 +- .../helpers/impl/knn_mindistance.cpp | 2 +- .../helpers/impl/sparse_to_dense.cpp | 2 +- .../ops/declarable/helpers/impl/where.cpp | 4 +- .../declarable/impl/BroadcastableBoolOp.cpp | 4 +- .../ops/declarable/impl/BroadcastableOp.cpp | 4 +- .../ops/declarable/impl/DeclarableOp.cpp | 6 +- .../declarable/impl/LegacyBroadcastBoolOp.cpp | 4 +- .../ops/declarable/impl/LegacyBroadcastOp.cpp | 4 +- .../declarable/impl/LegacyIndexReduceOp.cpp | 38 +- .../impl/LegacyPairwiseTransformBoolOp.cpp | 6 +- .../impl/LegacyPairwiseTransformOp.cpp | 6 +- .../ops/declarable/impl/LegacyRandomOp.cpp | 3 +- .../ops/declarable/impl/LegacyReduce3Op.cpp | 4 +- .../declarable/impl/LegacyReduceBoolOp.cpp | 18 +- .../declarable/impl/LegacyReduceFloatOp.cpp | 20 +- .../declarable/impl/LegacyReduceLongOp.cpp | 14 +- .../ops/declarable/impl/LegacyReduceOp.cpp | 12 +- .../declarable/impl/LegacyReduceSameOp.cpp | 18 +- .../declarable/impl/LegacyScalarBoolOp.cpp | 6 +- .../ops/declarable/impl/LegacyScalarOp.cpp | 6 +- .../ops/declarable/impl/LegacyStatsOp.cpp | 15 +- .../declarable/impl/LegacyTransformAnyOp.cpp | 4 +- .../declarable/impl/LegacyTransformBoolOp.cpp | 4 +- .../impl/LegacyTransformFloatOp.cpp | 4 +- .../ops/declarable/impl/LegacyTransformOp.cpp | 2 +- .../declarable/impl/LegacyTransformSameOp.cpp | 4 +- .../impl/LegacyTransformStrictOp.cpp | 2 +- .../declarable/platform/cudnn/avgpool2d.cu | 2 +- .../declarable/platform/cudnn/avgpool3d.cu | 2 +- .../declarable/platform/cudnn/batchnorm.cu | 32 +- .../ops/declarable/platform/cudnn/conv2d.cu | 10 +- .../ops/declarable/platform/cudnn/conv3d.cu | 10 +- .../declarable/platform/cudnn/cudnnUtils.cu | 12 +- .../platform/cudnn/depthwiseConv2d.cu | 10 +- .../declarable/platform/cudnn/maxpool2d.cu | 2 +- .../declarable/platform/cudnn/maxpool3d.cu | 2 +- .../declarable/platform/mkldnn/batchnorm.cpp | 18 +- .../ops/declarable/platform/mkldnn/conv2d.cpp | 12 +- .../ops/declarable/platform/mkldnn/conv3d.cpp | 14 +- .../declarable/platform/mkldnn/deconv2d.cpp | 12 +- .../platform/mkldnn/deconv2d_tf.cpp | 2 +- .../declarable/platform/mkldnn/deconv3d.cpp | 12 +- .../platform/mkldnn/depthwiseConv2d.cpp | 12 +- .../declarable/platform/mkldnn/lstmLayer.cpp | 6 +- .../ops/declarable/platform/mkldnn/matmul.cpp | 8 +- .../platform/mkldnn/mkldnnUtils.cpp | 34 +- .../declarable/platform/mkldnn/softmax.cpp | 4 +- .../ops/declarable/platform/mkldnn/tanh.cpp | 4 +- .../declarable/platform/mkldnn/xw_plus_b.cpp | 10 +- libnd4j/include/ops/impl/specials_double.hpp | 16 +- libnd4j/include/ops/impl/specials_single.hpp | 48 +- libnd4j/include/ops/ops.h | 40 +- libnd4j/include/ops/random_ops.h | 4 +- libnd4j/include/ops/special_random_ops.h | 32 +- libnd4j/include/ops/specials.h | 32 +- libnd4j/include/ops/specials_cuda.h | 18 +- .../layers_tests/BroadcastableOpsTests.cpp | 18 +- .../tests_cpu/layers_tests/BrodcastTests.cpp | 3 +- .../layers_tests/ConvolutionTests1.cpp | 6 +- .../layers_tests/CudaBasicsTests1.cu | 390 +++++----- .../layers_tests/DeclarableOpsTests1.cpp | 40 +- .../layers_tests/DeclarableOpsTests11.cpp | 2 +- .../layers_tests/DeclarableOpsTests12.cpp | 16 +- .../layers_tests/DeclarableOpsTests13.cpp | 12 +- .../layers_tests/DeclarableOpsTests6.cpp | 2 +- .../layers_tests/DeclarableOpsTests9.cpp | 14 +- libnd4j/tests_cpu/layers_tests/EmptyTests.cpp | 2 +- .../tests_cpu/layers_tests/HelpersTests1.cpp | 6 +- .../layers_tests/JavaInteropTests.cpp | 203 ++--- .../tests_cpu/layers_tests/LegacyOpsTests.cpp | 22 +- .../layers_tests/MultiDataTypeTests.cpp | 2 +- .../layers_tests/NDArrayConstructorsTests.cu | 8 +- .../layers_tests/NDArrayCudaBasicsTests.cu | 54 +- .../tests_cpu/layers_tests/NDArrayTests.cpp | 42 +- .../tests_cpu/layers_tests/NDArrayTests2.cpp | 32 +- .../tests_cpu/layers_tests/NativeOpsTests.cpp | 47 +- .../tests_cpu/layers_tests/PairwiseTests.cpp | 4 +- libnd4j/tests_cpu/layers_tests/RNGTests.cpp | 4 +- .../tests_cpu/layers_tests/ReduceTests.cpp | 157 ---- .../layers_tests/ShapeUtilsTests.cpp | 24 +- .../layers_tests/SparseUtilsTest.cpp | 2 +- libnd4j/tests_cpu/layers_tests/TadTests.cpp | 42 +- 470 files changed, 7521 insertions(+), 7468 deletions(-) delete mode 100644 libnd4j/tests_cpu/layers_tests/ReduceTests.cpp diff --git a/libnd4j/include/array/ConstantDescriptor.h b/libnd4j/include/array/ConstantDescriptor.h index 589ba2353..89e36c2a9 100644 --- a/libnd4j/include/array/ConstantDescriptor.h +++ b/libnd4j/include/array/ConstantDescriptor.h @@ -35,7 +35,7 @@ namespace sd { std::vector _floatValues; public: ConstantDescriptor(double* values, int length); - ConstantDescriptor(Nd4jLong* values, int length); + ConstantDescriptor(Nd4jLong const* values, int length); ConstantDescriptor(std::initializer_list values); explicit ConstantDescriptor(std::vector &values); diff --git a/libnd4j/include/array/NDArray.h b/libnd4j/include/array/NDArray.h index 6ab301200..7936f6688 100644 --- a/libnd4j/include/array/NDArray.h +++ b/libnd4j/include/array/NDArray.h @@ -125,7 +125,7 @@ namespace sd { void templatedDoubleAssign(void *xBuffer, const Nd4jLong xOffset, const void *yBuffer, const Nd4jLong yOffset) const; template - FORCEINLINE R templatedGet(void *buffer, const Nd4jLong index) const; + FORCEINLINE R templatedGet(void const* buffer, const Nd4jLong index) const; /* template R templatedGetIndex(void *buffer, Nd4jLong *indices) const; @@ -193,7 +193,7 @@ namespace sd { #ifndef __JAVACPP_HACK__ NDArray(std::shared_ptr buffer, const ShapeDescriptor& descriptor, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const Nd4jLong offset = 0); - NDArray(std::shared_ptr buffer, const char order, const std::vector &shape, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); + NDArray(std::shared_ptr buffer, char order, const std::vector &shape, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); /** * This contructors create scalar array containing string utf8 @@ -250,13 +250,14 @@ namespace sd { /** * do not allocate memory, memory for array is passed from outside */ - NDArray(void *buffer, Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isBuffAlloc = false); + NDArray(void *buffer, Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool isBuffAlloc = false); + NDArray(void *buffer, const Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool isBuffAlloc = false); /** * do not allocate memory, memory for array is passed from outside * we suppose the content of both (device and host) buffers is identical */ - NDArray(void *buffer, void *bufferD, Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isBuffAlloc = false, const bool isBuffDAlloc = false); + NDArray(void *buffer, void *bufferD, const Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool isBuffAlloc = false, bool isBuffDAlloc = false); /** * copy constructor @@ -277,28 +278,28 @@ namespace sd { /** * constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently */ - NDArray(Nd4jLong* shapeInfo, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool nullify = true); + NDArray(const Nd4jLong* shapeInfo, bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool nullify = true); /** * constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to be zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently * set dtype as array type */ - NDArray(Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool nullify = true); + NDArray(const Nd4jLong* shapeInfo, sd::DataType dtype, bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool nullify = true); /** * this constructor creates new array using shape information contained in vector argument */ - NDArray(const char order, const std::vector &shape, sd::DataType dtype = DOUBLE, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); + NDArray(char order, const std::vector &shape, sd::DataType dtype = DOUBLE, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); /** * This constructor creates new array with elements copied from data and using shape information stored in shape, elements from data will be casted to dtype */ - NDArray(const char order, const std::vector &shape, const std::vector& data, sd::DataType dtype = DOUBLE, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); + NDArray(char order, const std::vector &shape, const std::vector& data, sd::DataType dtype = DOUBLE, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); /** * this constructor creates new array using given buffer (without memory allocation) and shape information stored in shape */ - NDArray(void *buffer, const char order, const std::vector &shape, sd::DataType dtype, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isBuffAlloc = false); + NDArray(void *buffer, char order, const std::vector &shape, sd::DataType dtype, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isBuffAlloc = false); /** * This method returns new array with the same shape & data type @@ -317,12 +318,12 @@ namespace sd { * this constructor creates new NDArray with shape matching "other" array, * doesn't copy "other" elements into new array !!! */ - explicit NDArray(const NDArray* other, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext ::defaultContext()); + explicit NDArray(const NDArray* other, bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext ::defaultContext()); /** * this constructor creates scalar(and set its value = 0) or empty array depending on bool argument isScalar */ - NDArray(sd::DataType dtype, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isScalar = true); + NDArray(sd::DataType dtype, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool isScalar = true); /** * This method blocks until asynchronous operation finishes @@ -364,9 +365,11 @@ namespace sd { * @param offset * @return */ - void *bufferWithOffset(Nd4jLong offset) const; + void const* bufferWithOffset(Nd4jLong offset) const; + void* bufferWithOffset(Nd4jLong offset); - void* specialBufferWithOffset(Nd4jLong offset) const; + void const* specialBufferWithOffset(Nd4jLong offset) const; + void* specialBufferWithOffset(Nd4jLong offset); /** * copy assignment operator * in particular, when _dataType != other._dataType and both shapes are the same, there will be allocation of new _buffer and _dataType acquires other._dataType @@ -450,38 +453,39 @@ namespace sd { /** * returns host buffer */ - FORCEINLINE void* getBuffer() const; FORCEINLINE void* buffer(); + FORCEINLINE const void* buffer() const; /** * returns buffer offset (offset is the same for host and device buffers) */ - FORCEINLINE Nd4jLong getBufferOffset() const; - FORCEINLINE Nd4jLong bufferOffset(); + FORCEINLINE Nd4jLong bufferOffset() const; /** * if _bufferD==nullptr return _buffer, else return _bufferD */ void* specialBuffer(); - void* getSpecialBuffer() const; + const void* specialBuffer() const; /** * returns device buffer if compilation is for cuda case, otherwise returns host buffer */ - void* getPlatformBuffer() const; void* platformBuffer(); + const void* platformBuffer() const; template - T* bufferAsT() const; + T* bufferAsT(); + + template + const T* bufferAsT() const; /** * returns _shapeInfo */ - FORCEINLINE Nd4jLong* shapeInfo(); - FORCEINLINE Nd4jLong* getShapeInfo() const; + FORCEINLINE const Nd4jLong* shapeInfo() const; /** @@ -493,12 +497,9 @@ namespace sd { /** * if _shapeInfoD==nullptr return _shapeInfo, else return _shapeInfoD */ - FORCEINLINE Nd4jLong* specialShapeInfo(); - FORCEINLINE Nd4jLong* getSpecialShapeInfo() const; + FORCEINLINE const Nd4jLong* specialShapeInfo() const; - - Nd4jLong* platformShapeInfo(); - Nd4jLong* getPlatformShapeInfo() const; + const Nd4jLong* platformShapeInfo() const; /** * permutes (in-place) the dimensions in array according to "dimensions" array @@ -1509,8 +1510,8 @@ bool NDArray::isAttached() { } template -FORCEINLINE R NDArray::templatedGet(void *buffer, Nd4jLong index) const { - auto b = reinterpret_cast(buffer); +FORCEINLINE R NDArray::templatedGet(void const* buffer, Nd4jLong index) const { + auto b = reinterpret_cast(buffer); auto v = static_cast(b[index]); return v; } @@ -1625,9 +1626,9 @@ bool NDArray::nonNull() const { return true; if(!Environment::getInstance()->isCPU()) - return getDataBuffer()->special() != nullptr && getSpecialShapeInfo() != nullptr; + return getDataBuffer()->special() != nullptr && specialShapeInfo() != nullptr; - return getDataBuffer()->primary() != nullptr && getShapeInfo() != nullptr; + return getDataBuffer()->primary() != nullptr && shapeInfo() != nullptr; } ////////////////////////////////////////////////////////////////////////// @@ -1744,7 +1745,7 @@ bool NDArray::isEmpty() const { if (this->_shapeInfo == nullptr) return false; - return ArrayOptions::arrayType(this->getShapeInfo()) == ArrayType::EMPTY; + return ArrayOptions::arrayType(this->shapeInfo()) == ArrayType::EMPTY; } ////////////////////////////////////////////////////////////////////////// @@ -1804,7 +1805,7 @@ T& NDArray::t(const Nd4jLong i, const Nd4jLong j) { syncToHost(); Nd4jLong coords[2] = {i, j}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickWriteHost(); return *(reinterpret_cast(bufferWithOffset(offset))); } @@ -1821,7 +1822,7 @@ T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) { syncToHost(); Nd4jLong coords[3] = {i, j, k}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickWriteHost(); return *(reinterpret_cast(bufferWithOffset(offset))); } @@ -1838,7 +1839,7 @@ T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLo syncToHost(); Nd4jLong coords[4] = {i, j, k, w}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickWriteHost(); return *(reinterpret_cast(bufferWithOffset(offset))); } @@ -1856,7 +1857,7 @@ T NDArray::t(const Nd4jLong i) const { syncToHost(); tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(getOffset(i)))); + return *(reinterpret_cast(bufferWithOffset(getOffset(i)))); } //////////////////////////////////////////////////////////////////////// @@ -1872,9 +1873,9 @@ T NDArray::t(const Nd4jLong i, const Nd4jLong j) const { syncToHost(); Nd4jLong coords[2] = {i, j}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + return *(reinterpret_cast(bufferWithOffset(offset))); } template @@ -1889,9 +1890,9 @@ T NDArray::t(const Nd4jLong i, const Nd4jLong j) const { syncToHost(); Nd4jLong coords[3] = {i, j, k}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + return *(reinterpret_cast(bufferWithOffset(offset))); } template @@ -1906,9 +1907,9 @@ T NDArray::t(const Nd4jLong i, const Nd4jLong j) const { syncToHost(); Nd4jLong coords[4] = {i, j, k, w}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + return *(reinterpret_cast(bufferWithOffset(offset))); } #ifndef __JAVACPP_HACK__ @@ -1924,8 +1925,7 @@ std::shared_ptr NDArray::dataBuffer() { #endif //////////////////////////////////////////////////////////////////////// -void* NDArray::getBuffer() const { - +const void* NDArray::buffer() const { return _buffer->primary() != nullptr ? static_cast(_buffer->primary()) + (_offset * sizeOfT()) : nullptr; } @@ -1934,18 +1934,13 @@ void* NDArray::buffer() { return _buffer->primary() != nullptr ? static_cast(_buffer->primary()) + (_offset * sizeOfT()) : nullptr; } -//////////////////////////////////////////////////////////////////////// -Nd4jLong* NDArray::getShapeInfo() const { - return _shapeInfo; -} - ////////////////////////////////////////////////////////////////////////// -Nd4jLong* NDArray::shapeInfo() { +const Nd4jLong* NDArray::shapeInfo() const { return _shapeInfo; } //////////////////////////////////////////////////////////////////////// -Nd4jLong* NDArray::specialShapeInfo() { +const Nd4jLong* NDArray::specialShapeInfo() const { if (_shapeInfoD == nullptr) return _shapeInfo; // FIXME: this should be fixed once CUDA backend added @@ -1953,23 +1948,10 @@ Nd4jLong* NDArray::specialShapeInfo() { } //////////////////////////////////////////////////////////////////////// -Nd4jLong NDArray::getBufferOffset() const { +Nd4jLong NDArray::bufferOffset() const { return _offset; } -//////////////////////////////////////////////////////////////////////// -Nd4jLong NDArray::bufferOffset() { - return _offset; -} - -//////////////////////////////////////////////////////////////////////// -Nd4jLong* NDArray::getSpecialShapeInfo() const{ - if (_shapeInfoD == nullptr) - return _shapeInfo; - // FIXME: this should be fixed once CUDA backend added - return _shapeInfoD; -} - #if defined(__CUDACC__) //&& defined(BUILD_TESTS) // for CUDA we need stil stuff inline diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX index 7756fb7ae..42f5f47f3 100644 --- a/libnd4j/include/array/NDArray.hXX +++ b/libnd4j/include/array/NDArray.hXX @@ -143,7 +143,7 @@ NDArray::NDArray(void* buffer, const char order, const std::vector &sh //////////////////////////////////////////////////////////////////////// // creates new NDArray using shape information from "shapeInfo" array, set all elements in new array to be zeros -NDArray::NDArray(Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides, sd::LaunchContext * context, const bool nullify) { +NDArray::NDArray(const Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides, sd::LaunchContext * context, const bool nullify) { if (shapeInfo == nullptr) throw std::runtime_error("NDArray constructor: can't be initalized without shapeinfo"); @@ -213,72 +213,76 @@ NDArray::NDArray(sd::LaunchContext * context) { _length = 0; } -//////////////////////////////////////////////////////////////////////// -// creates new NDArray using shape information from "shapeInfo" array, set all elements in new array to be zeros, set dtype as array type -NDArray::NDArray(Nd4jLong* shapeInfo, const bool copyStrides, sd::LaunchContext * context, const bool nullify): - NDArray(shapeInfo, ArrayOptions::dataType(shapeInfo), copyStrides, context) { -} - -//////////////////////////////////////////////////////////////////////// -NDArray::NDArray(std::shared_ptr buffer, const ShapeDescriptor& descriptor, sd::LaunchContext* context, const Nd4jLong offset) { - - _context = context; - _offset = offset; - - setShapeInfo(descriptor); - - _buffer = buffer; - - _isView = offset > 0 || _length * DataTypeUtils::sizeOf(_dataType) < buffer->getLenInBytes(); -} - -//////////////////////////////////////////////////////////////////////// -// do not allocate memory, memory for array is passed from outside -NDArray::NDArray(void *buffer, Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc) { - - if (buffer == nullptr && ArrayOptions::arrayType(shapeInfo) != ArrayType::EMPTY) - throw std::runtime_error("NDArray constructor: can't be initalized with nullptr buffer !"); - - if (shapeInfo == nullptr) - throw std::runtime_error("NDArray constructor: can't be initalized without shapeinfo !"); - - if ((int) shapeInfo[0] > MAX_RANK) - throw std::invalid_argument("NDArray constructor: rank of NDArray can't exceed 32 !"); - - _context = context; - _isAttached = getContext()->getWorkspace() != nullptr; - _offset = 0; - - setShapeInfo(ShapeDescriptor(shapeInfo)); - - if (this->isEmpty()) { - tickReadDevice(); - tickReadHost(); + //////////////////////////////////////////////////////////////////////// + // creates new NDArray using shape information from "shapeInfo" array, set all elements in new array to be zeros, set dtype as array type + NDArray::NDArray(const Nd4jLong* shapeInfo, const bool copyStrides, sd::LaunchContext * context, const bool nullify): + NDArray(shapeInfo, ArrayOptions::dataType(shapeInfo), copyStrides, context) { } - else { - _buffer = std::make_shared(buffer, lengthOf() * sizeOfT(), dataType(), isBuffAlloc, getContext()->getWorkspace()); + + //////////////////////////////////////////////////////////////////////// + NDArray::NDArray(std::shared_ptr buffer, const ShapeDescriptor& descriptor, sd::LaunchContext* context, const Nd4jLong offset) { + + _context = context; + _offset = offset; + + setShapeInfo(descriptor); + + _buffer = buffer; + + _isView = offset > 0 || _length * DataTypeUtils::sizeOf(_dataType) < buffer->getLenInBytes(); } -} -//////////////////////////////////////////////////////////////////////// -// do not allocate memory, memory for array is passed from outside -// we suppose the content of both (device and host) buffers is identical -NDArray::NDArray(void *buffer, void* bufferD, Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc, const bool isBuffDAlloc) { + NDArray::NDArray(void *buffer, Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc) : NDArray::NDArray(buffer, const_cast(shapeInfo), context, isBuffAlloc) { + // + } - if (shapeInfo == nullptr) - throw std::runtime_error("NDArray constructor cuda: can't be initalized without shapeinfo"); + //////////////////////////////////////////////////////////////////////// + // do not allocate memory, memory for array is passed from outside + NDArray::NDArray(void *buffer, const Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc) { - if ((int) shapeInfo[0] > MAX_RANK) - throw std::invalid_argument("NDArray constructor cuda: rank of NDArray can't exceed 32"); + if (buffer == nullptr && ArrayOptions::arrayType(shapeInfo) != ArrayType::EMPTY) + throw std::runtime_error("NDArray constructor: can't be initalized with nullptr buffer !"); - _context = context; - _offset = 0; + if (shapeInfo == nullptr) + throw std::runtime_error("NDArray constructor: can't be initalized without shapeinfo !"); - setShapeInfo(ShapeDescriptor(shapeInfo)); + if ((int) shapeInfo[0] > MAX_RANK) + throw std::invalid_argument("NDArray constructor: rank of NDArray can't exceed 32 !"); - if (!isEmpty()) - _buffer = std::make_shared(buffer, bufferD, lengthOf() * sizeOfT(), dataType(), isBuffAlloc, isBuffDAlloc, getContext()->getWorkspace()); -} + _context = context; + _isAttached = getContext()->getWorkspace() != nullptr; + _offset = 0; + + setShapeInfo(ShapeDescriptor(shapeInfo)); + + if (this->isEmpty()) { + tickReadDevice(); + tickReadHost(); + } + else { + _buffer = std::make_shared(buffer, lengthOf() * sizeOfT(), dataType(), isBuffAlloc, getContext()->getWorkspace()); + } + } + + //////////////////////////////////////////////////////////////////////// + // do not allocate memory, memory for array is passed from outside + // we suppose the content of both (device and host) buffers is identical + NDArray::NDArray(void *buffer, void* bufferD, const Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc, const bool isBuffDAlloc) { + + if (shapeInfo == nullptr) + throw std::runtime_error("NDArray constructor cuda: can't be initalized without shapeinfo"); + + if ((int) shapeInfo[0] > MAX_RANK) + throw std::invalid_argument("NDArray constructor cuda: rank of NDArray can't exceed 32"); + + _context = context; + _offset = 0; + + setShapeInfo(ShapeDescriptor(shapeInfo)); + + if (!isEmpty()) + _buffer = std::make_shared(buffer, bufferD, lengthOf() * sizeOfT(), dataType(), isBuffAlloc, isBuffDAlloc, getContext()->getWorkspace()); + } ////////////////////////////////////////////////////////////////////////// NDArray::NDArray(std::shared_ptr buffer, const char order, const std::vector &shape, sd::LaunchContext* context) { @@ -1046,7 +1050,7 @@ std::vector NDArray::asByteVector() { auto dataLength = offsetsBuffer[numWords]; std::vector result(headerLength + dataLength); - memcpy(result.data(), getBuffer(), headerLength + dataLength); + memcpy(result.data(), buffer(), headerLength + dataLength); return result; } else { @@ -1056,10 +1060,10 @@ std::vector NDArray::asByteVector() { if (this->isView()) { auto tmp = this->dup(this->ordering()); syncToHost(); - memcpy(result.data(), tmp.getBuffer(), (unsigned long long) lengthOf() * sizeOfT()); + memcpy(result.data(), tmp.buffer(), (unsigned long long) lengthOf() * sizeOfT()); } else { syncToHost(); - memcpy(result.data(), getBuffer(), (unsigned long long) lengthOf() * sizeOfT()); + memcpy(result.data(), buffer(), (unsigned long long) lengthOf() * sizeOfT()); } return result; } @@ -1085,7 +1089,7 @@ void NDArray::streamline(char o) { syncToDevice(); std::shared_ptr newBuffer = std::make_shared(this->lengthOf() * sizeOfT(), dataType(), getContext()->getWorkspace()); auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(dataType(), order, rankOf(), shapeOf()); - NativeOpExecutioner::execTransformSame(getContext(), transform::Copy, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), newBuffer->primary(), static_cast(shapeBuffer.primary()), newBuffer->special(), static_cast(shapeBuffer.special()), nullptr, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), transform::Copy, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), newBuffer->primary(), static_cast(shapeBuffer.primary()), newBuffer->special(), static_cast(shapeBuffer.special()), nullptr, nullptr, nullptr); setShapeInfo(static_cast(shapeBuffer.primary())); _buffer = newBuffer; _offset = 0; @@ -1140,7 +1144,7 @@ void NDArray::copyBuffersContinuouslyFrom(const NDArray& other, size_t sizeToCop if(offsetThis == 0) offsetThis = bufferOffset(); if(offsetOther == 0) - offsetOther = other.getBufferOffset(); + offsetOther = other.bufferOffset(); dataBuffer()->copyBufferFrom(*other.getDataBuffer(), sizeToCopyInBytes, offsetThis, offsetOther); } @@ -1154,10 +1158,7 @@ void NDArray::assign(const NDArray& other, bool allowParallelism) { if (other.isEmpty()) { if (!isEmpty()) { - ArrayOptions::setPropertyBit(shapeInfo(), ARRAY_EMPTY); - syncShape(); - _buffer = std::make_shared(); - _offset = 0; + throw std::runtime_error("Cannot assign empty array to non-empty array"); } return; } @@ -1171,7 +1172,7 @@ void NDArray::assign(const NDArray& other, bool allowParallelism) { if(lengthOf() == 1) { NDArray::preparePrimaryUse({this}, {&other}); - BUILD_DOUBLE_SELECTOR(dataType(), other.dataType(), templatedDoubleAssign, (buffer(), 0, other.getBuffer(), 0), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(dataType(), other.dataType(), templatedDoubleAssign, (buffer(), 0, other.buffer(), 0), LIBND4J_TYPES, LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {&other}); this->syncToDevice(); } @@ -1179,12 +1180,12 @@ void NDArray::assign(const NDArray& other, bool allowParallelism) { if (dataType() != other.dataType()) { auto tmp = other.cast(dataType()); NDArray::prepareSpecialUse({this}, {&tmp}); - NativeOpExecutioner::execScalar(getContext(), scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), tmp.getBuffer(), tmp.getShapeInfo(), tmp.getSpecialBuffer(), tmp.getSpecialShapeInfo(), nullptr, allowParallelism); + NativeOpExecutioner::execScalar(getContext(), scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr, allowParallelism); NDArray::registerSpecialUse({this}, {}); } else { NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr, allowParallelism); + NativeOpExecutioner::execScalar(getContext(), scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr, allowParallelism); NDArray::registerSpecialUse({this}, {&other}); } } @@ -1198,7 +1199,7 @@ void NDArray::assign(const NDArray& other, bool allowParallelism) { } NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execTransformAny(getContext(), transform::Assign, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, nullptr, nullptr, allowParallelism); + NativeOpExecutioner::execTransformAny(getContext(), transform::Assign, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, nullptr, nullptr, allowParallelism); NDArray::registerSpecialUse({this}, {&other}); } } @@ -1216,7 +1217,7 @@ void NDArray::assign(const T& value, bool allowParallelism) { auto temp = NDArrayFactory::create(dataType(), value, this->getContext()); NDArray::prepareSpecialUse({this}, {&temp}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::CopyPws, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), temp.buffer(), temp.shapeInfo(), temp.specialBuffer(), temp.getSpecialShapeInfo(), nullptr, allowParallelism); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), temp.buffer(), temp.shapeInfo(), temp.specialBuffer(), temp.specialShapeInfo(), nullptr, allowParallelism); NDArray::registerSpecialUse({this}, {&temp}); } template ND4J_EXPORT void NDArray::assign(const double& value, bool allowParallelism); @@ -1254,7 +1255,7 @@ NDArray NDArray::varianceNumber(sd::variance::Ops op, bool biasCorrected) { NDArray res(DataTypeUtils::pickFloatingType(dataType()), getContext()); NDArray::prepareSpecialUse({&res}, {this}); - NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, buffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo(), biasCorrected); + NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo(), biasCorrected); NDArray::registerSpecialUse({&res}, {this}); return res; @@ -1268,7 +1269,7 @@ NDArray NDArray::sumNumber() const { NDArray res(dataType(), getContext()); NDArray::prepareSpecialUse({&res}, {this}); - NativeOpExecutioner::execReduceSameScalar(getContext(), sd::reduce::SameOps::Sum, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); + NativeOpExecutioner::execReduceSameScalar(getContext(), sd::reduce::SameOps::Sum, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); NDArray::registerSpecialUse({&res}, {this}); return res; @@ -1283,7 +1284,7 @@ NDArray NDArray::meanNumber() const { NDArray res(DataTypeUtils::pickFloatingType(dataType()), getContext()); NDArray::prepareSpecialUse({&res}, {this}); - NativeOpExecutioner::execReduceFloatScalar(getContext(), sd::reduce::FloatOps::Mean, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); + NativeOpExecutioner::execReduceFloatScalar(getContext(), sd::reduce::FloatOps::Mean, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); NDArray::registerSpecialUse({&res}, {this}); return res; } @@ -1315,7 +1316,7 @@ void NDArray::templatedSet(void *buffer, const Nd4jLong *indices, const void *va auto t = reinterpret_cast(buffer); const auto y = *(reinterpret_cast(value)); - auto xOffset = shape::getOffset(getShapeInfo(), indices); + auto xOffset = shape::getOffset(shapeInfo(), indices); t[xOffset] = static_cast(y); } BUILD_DOUBLE_TEMPLATE(template ND4J_EXPORT void NDArray::templatedSet, (void *buffer, const Nd4jLong *indices, const void *value), LIBND4J_TYPES, LIBND4J_TYPES); @@ -1339,9 +1340,13 @@ void NDArray::setContext(sd::LaunchContext *context) { } ////////////////////////////////////////////////////////////////////////// -void* NDArray::bufferWithOffset(Nd4jLong offset) const { +void const* NDArray::bufferWithOffset(Nd4jLong offset) const { + return const_cast(buffer() != nullptr ? static_cast(buffer()) + (offset * sizeOfT()) : nullptr); +} - return getBuffer() != nullptr ? static_cast(getBuffer()) + (offset * sizeOfT()) : nullptr; +////////////////////////////////////////////////////////////////////////// +void* NDArray::bufferWithOffset(Nd4jLong offset) { + return const_cast(buffer() != nullptr ? static_cast(buffer()) + (offset * sizeOfT()) : nullptr); } ////////////////////////////////////////////////////////////////////////// @@ -1431,7 +1436,7 @@ NDArray NDArray::reduceNumber(sd::reduce::FloatOps op, void *extraParams) const NDArray result(shape, true, this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execReduceFloatScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduceFloatScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -1445,7 +1450,7 @@ NDArray NDArray::reduceNumber(sd::reduce::SameOps op, void *extraParams) const { NDArray result(dataType(), getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execReduceSameScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduceSameScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -1460,7 +1465,7 @@ NDArray NDArray::reduceNumber(sd::reduce::BoolOps op, void *extraParams) const { NDArray result(shape, true, this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execReduceBoolScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduceBoolScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -1475,7 +1480,7 @@ NDArray NDArray::reduceNumber(sd::reduce::LongOps op, void *extraParams) const { NDArray result(shape, true, this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execReduceLongScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduceLongScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -1489,7 +1494,7 @@ void NDArray::reduceNumber(sd::reduce::FloatOps op, NDArray& target, void *extra throw std::invalid_argument("NDArray::reduceNumber FloatOps: target array should be scalar and have corresponding float type!"); NDArray::prepareSpecialUse({&target}, {this}); - NativeOpExecutioner::execReduceFloatScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execReduceFloatScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); NDArray::registerSpecialUse({&target}, {this}); } @@ -1502,7 +1507,7 @@ void NDArray::reduceNumber(sd::reduce::SameOps op, NDArray& target, void *extraP throw std::invalid_argument("NDArray::reduceNumber SameOps: target array should be scalar and have same type as this array!"); NDArray::prepareSpecialUse({&target}, {this}); - NativeOpExecutioner::execReduceSameScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, target.getBuffer(), target.getShapeInfo(), target.specialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceSameScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); NDArray::registerSpecialUse({&target}, {this}); } @@ -1515,7 +1520,7 @@ void NDArray::reduceNumber(sd::reduce::BoolOps op, NDArray& target, void *extraP throw std::invalid_argument("NDArray::reduceNumber BoolOps: target array should be scalar and have bool type!"); NDArray::prepareSpecialUse({&target}, {this}); - NativeOpExecutioner::execReduceBoolScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, target.getBuffer(), target.getShapeInfo(), target.specialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceBoolScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); NDArray::registerSpecialUse({&target}, {this}); } @@ -1528,7 +1533,7 @@ void NDArray::reduceNumber(sd::reduce::LongOps op, NDArray& target, void *extraP throw std::invalid_argument("NDArray::reduceNumber LongOps: target array should be scalar and have long type!"); NDArray::prepareSpecialUse({&target}, {this}); - NativeOpExecutioner::execReduceLongScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, target.getBuffer(), target.getShapeInfo(), target.specialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceLongScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); NDArray::registerSpecialUse({&target}, {this}); } @@ -1540,7 +1545,7 @@ NDArray NDArray::indexReduceNumber(sd::indexreduce::Ops op, ExtraArguments *extr auto res = NDArrayFactory::create(0); NDArray::NDArray::prepareSpecialUse({&res}, {this}); - NativeOpExecutioner::execIndexReduceScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams == nullptr ? nullptr : extraParams->argumentsAsT(this->dataType()), res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); + NativeOpExecutioner::execIndexReduceScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams == nullptr ? nullptr : extraParams->argumentsAsT(this->dataType()), res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); NDArray::NDArray::registerSpecialUse({&res}, {this}); return res; @@ -1734,7 +1739,7 @@ static void printFormatted(NDArray const* arr, int depth, int limit) { //std::unique_ptr arrs(arr->allTensorsAlongDimension({0})); size_t restCount = 2; printf("["); - restCount = ShapeUtils::getNumOfSubArrs(arr->getShapeInfo(), {0}); + restCount = ShapeUtils::getNumOfSubArrs(arr->shapeInfo(), {0}); for (size_t arrIndex = 0; arrIndex < restCount; ++arrIndex) { NDArray subArr = (*arr)(arrIndex, {0}); printFormatted(&subArr, depth + 1, limit); @@ -1792,14 +1797,14 @@ void NDArray::printIndexedBuffer(const char* msg, Nd4jLong limit) const { ////////////////////////////////////////////////////////////////////////// template void* NDArray::templatedPointerShift(const Nd4jLong offset) const { - return reinterpret_cast(getBuffer()) + offset; + return const_cast(reinterpret_cast(buffer()) + offset); } BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void* NDArray::templatedPointerShift, (const Nd4jLong offset) const, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// // method makes copy of this array and applies to the copy transpose operation, this array remains unaffected NDArray NDArray::transpose() const &{ - NDArray newArr(getDataBuffer(), ShapeDescriptor(getShapeInfo()), getContext(), getBufferOffset()); + NDArray newArr(getDataBuffer(), ShapeDescriptor(shapeInfo()), getContext(), bufferOffset()); newArr.transposei(); return newArr; @@ -1818,7 +1823,7 @@ NDArray NDArray::transpose() && { void NDArray::transpose(NDArray& target) const { auto correctShape = ShapeUtils::evalTranspShapeInfo(*this, getContext()->getWorkspace()); - if(!shape::equalsStrict(correctShape, target.getShapeInfo())) + if(!shape::equalsStrict(correctShape, target.shapeInfo())) throw std::runtime_error("NDArray::transpose method: the shapeInfo of target array is wrong !"); target._buffer = _buffer; @@ -1920,7 +1925,7 @@ Nd4jLong NDArray::argMax(std::initializer_list dimensions) { // create new array with corresponding order and shape, new array will point to the same _buffer as this array NDArray NDArray::reshape(const char order, const std::vector& shape, const bool copyToNewBuff) const & { - NDArray newArr(getDataBuffer(), ShapeDescriptor(getShapeInfo()), getContext(), getBufferOffset()); + NDArray newArr(getDataBuffer(), ShapeDescriptor(shapeInfo()), getContext(), bufferOffset()); newArr.reshapei(order, shape, copyToNewBuff); return newArr; @@ -2001,7 +2006,7 @@ NDArray NDArray::permute(const int* dimensions, const int rank) const & { // evaluate shapeInfo for output (permuted) array ret auto shapeInfoPermuted = ShapeUtils::evalPermShapeInfo(dimensions, rank, *this, getContext()->getWorkspace()); - NDArray ret(getDataBuffer(), ShapeDescriptor(shapeInfoPermuted), getContext(), getBufferOffset()); + NDArray ret(getDataBuffer(), ShapeDescriptor(shapeInfoPermuted), getContext(), bufferOffset()); ret._isView = true; return ret; } @@ -2157,19 +2162,26 @@ bool NDArray::isUnitary() { ////////////////////////////////////////////////////////////////////////// template <> -std::string* ND4J_EXPORT NDArray::bufferAsT() const { +const std::string* ND4J_EXPORT NDArray::bufferAsT() const { throw std::runtime_error("This method is NOT supposed to be used"); } ////////////////////////////////////////////////////////////////////////// template -T* NDArray::bufferAsT() const { +const T* NDArray::bufferAsT() const { // FIXME: do we REALLY want sync here? syncToHost(); - return reinterpret_cast(getBuffer()); + return reinterpret_cast(buffer()); } -BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT , * NDArray::bufferAsT() const, LIBND4J_TYPES); +BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT const, * NDArray::bufferAsT() const, LIBND4J_TYPES); + +template +T* NDArray::bufferAsT() { + syncToHost(); + return reinterpret_cast(buffer()); +} +BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT, * NDArray::bufferAsT(), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// NDArray NDArray::subarray(IndicesList& idx) const { @@ -2282,7 +2294,7 @@ NDArray NDArray::asT() const{ auto result = isScalar() ? NDArray('c', {}, std::vector{0.}, DataTypeUtils::fromT(), this->getContext()) : NDArray(ordering(), getShapeAsVector(), DataTypeUtils::fromT(), this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformAny(getContext(), transform::AnyOps::Assign, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.getBuffer(), result.getShapeInfo(), result.getSpecialBuffer(), result.getSpecialShapeInfo(), nullptr, nullptr, nullptr); + NativeOpExecutioner::execTransformAny(getContext(), transform::AnyOps::Assign, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), nullptr, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -2449,20 +2461,20 @@ void NDArray::operator+=(const NDArray& other) { if (this->lengthOf() != 1 && other.lengthOf() == 1) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Add, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else{ - Nd4jLong *bShape = nullptr; + const Nd4jLong *bShape = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, bShape, getContext()->getWorkspace())) throw std::invalid_argument("NDArray::operator+=: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(shape::equalsTypesAndShapesSoft(getShapeInfo(), bShape)) { + if(shape::equalsTypesAndShapesSoft(shapeInfo(), bShape)) { this->applyTrueBroadcast(sd::BroadcastOpsTuple::Add(), other, *this, false); } else { @@ -2483,20 +2495,20 @@ void NDArray::operator-=(const NDArray& other) { if (lengthOf() != 1 && other.lengthOf() == 1) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Subtract, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else{ - Nd4jLong *bShape = nullptr; + const Nd4jLong *bShape = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, bShape, getContext()->getWorkspace())) throw std::invalid_argument("NDArray::operator-=: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(shape::equalsTypesAndShapesSoft(getShapeInfo(), bShape)) { + if(shape::equalsTypesAndShapesSoft(shapeInfo(), bShape)) { this->applyTrueBroadcast(sd::BroadcastOpsTuple::Subtract(), other, *this, false); } else { @@ -2516,16 +2528,16 @@ void NDArray::operator*=(const NDArray& other) { if (lengthOf() != 1 && other.lengthOf() == 1) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Multiply, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else{ - Nd4jLong *bShape = nullptr; + const Nd4jLong *bShape = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, bShape, getContext()->getWorkspace())) throw std::invalid_argument("NDArray::operator*=: the shapes of this and other arrays are not suitable for broadcast operation !"); @@ -2553,16 +2565,16 @@ void NDArray::operator/=(const NDArray& other) { if (lengthOf() != 1 && other.lengthOf() == 1) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Divide, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else{ - Nd4jLong *bShape = nullptr; + const Nd4jLong *bShape = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, bShape, getContext()->getWorkspace())) throw std::invalid_argument("NDArray::operator/=: the shapes of this and other arrays are not suitable for broadcast operation !"); @@ -2587,7 +2599,7 @@ void NDArray::operator+=(const T value) { NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {}); } @@ -2609,7 +2621,7 @@ void NDArray::operator-=(const T value) { NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {}); } @@ -2629,7 +2641,7 @@ void NDArray::operator*=(const T scalar) { auto other = NDArrayFactory::create(this->dataType(), scalar, getContext()); NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {}); } @@ -2652,7 +2664,7 @@ void NDArray::operator/=(const T scalar) { auto other = NDArrayFactory::create(this->dataType(), scalar, getContext()); NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {}); } template ND4J_EXPORT void NDArray::operator/=(const double scalar); @@ -2672,10 +2684,10 @@ NDArray NDArray::operator-() const & { if (isS()) throw std::runtime_error("NDArray::negative-: you can't use this method on String array!"); - NDArray result(getShapeInfo(), false, getContext()); + NDArray result(shapeInfo(), false, getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformSame(getContext(), sd::transform::Neg, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), nullptr, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), sd::transform::Neg, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), nullptr, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -2687,7 +2699,7 @@ NDArray NDArray::operator-() && { throw std::runtime_error("NDArray::negative-: you can't use this method on String array!"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformSame(getContext(), sd::transform::Neg, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), sd::transform::Neg, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -2757,7 +2769,7 @@ double NDArray::getTrace() const { for(int j = 0; j < rank; ++j) indices[j] = 1; - auto offset = shape::getOffset(getShapeInfo(), indices); + auto offset = shape::getOffset(shapeInfo(), indices); for(int i = 0; i < rank; ++i) if(minDim > shape[i]) @@ -2779,7 +2791,7 @@ NDArray NDArray::quantize(const NDArray& array) { auto ws = array.getContext()->getWorkspace(); - Nd4jLong* shapeInfo = ShapeBuilders::copyShapeInfo(array.getShapeInfo(), true, ws); + Nd4jLong* shapeInfo = ShapeBuilders::copyShapeInfo(array.shapeInfo(), true, ws); ArrayOptions::setPropertyBit(shapeInfo, ARRAY_QUANTIZED); std::shared_ptr buffer = std::make_shared(TypeCast::estimateQuantizedSize(array.lengthOf()), ArrayOptions::dataType(shapeInfo), ws); @@ -2812,31 +2824,31 @@ void NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, const NDArray& other, // } if(checkTargetShape) { - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of target array must be equal to max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(!shape::equalsTypesAndShapesSoft(target.getShapeInfo(), newShapeInfo)) + if(!shape::equalsTypesAndShapesSoft(target.shapeInfo(), newShapeInfo)) throw std::runtime_error("NDArray::applyTrueBroadcast method: the shape or type of target array is wrong !"); } - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace()); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace()); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcast(getContext(), op.b, getBuffer(), xShapeInfoH, getSpecialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execBroadcast(getContext(), op.b, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); registerSpecialUse({&target}, {this, &other}); } @@ -2861,7 +2873,7 @@ void NDArray::applyTrueBroadcast(sd::BroadcastBoolOpsTuple op, const NDArray& ot // } if(checkTargetShape) { - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of target array must be equal to max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); if(!shape::equalsSoft(target._shapeInfo, newShapeInfo) || target.dataType() != DataType::BOOL) @@ -2870,24 +2882,24 @@ void NDArray::applyTrueBroadcast(sd::BroadcastBoolOpsTuple op, const NDArray& ot throw std::invalid_argument("NDArray::applyTrueBroadcast bool method: this and other arrays must have the same type !"); } - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace()); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace()); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcastBool(getContext(), op.b, getBuffer(), xShapeInfoH, getSpecialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + NativeOpExecutioner::execBroadcastBool(getContext(), op.b, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); registerSpecialUse({&target}, {this, &other}); } @@ -2912,7 +2924,7 @@ void NDArray::applyTrueBroadcast(sd::BroadcastIntOpsTuple op, const NDArray& oth // } if(checkTargetShape) { - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, false, newShapeInfo, getContext()->getWorkspace())) // the rank of target array must be equal to max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); if(!shape::equalsSoft(target._shapeInfo, newShapeInfo) || target.dataType() != this->dataType()) @@ -2921,24 +2933,24 @@ void NDArray::applyTrueBroadcast(sd::BroadcastIntOpsTuple op, const NDArray& oth throw std::invalid_argument("NDArray::applyTrueBroadcast int method: this and other arrays must have the same type !"); } - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace()); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace()); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcastInt(getContext(), op.b, getBuffer(), xShapeInfoH, getSpecialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execBroadcastInt(getContext(), op.b, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); registerSpecialUse({&target}, {this, &other}); } @@ -2951,7 +2963,7 @@ NDArray NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, const NDArray& oth return NDArray(other); } - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of new array = max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); NDArray result(newShapeInfo, true, getContext()); @@ -2970,11 +2982,11 @@ NDArray NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, NDArray&& other, E return NDArray(other); } - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of new array = max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(!shape::shapeEquals(newShapeInfo, other.getShapeInfo())) { + if(!shape::shapeEquals(newShapeInfo, other.shapeInfo())) { NDArray result(newShapeInfo, true, getContext()); this->applyTrueBroadcast(op, other, result, false, extraArgs); @@ -2994,11 +3006,11 @@ NDArray NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, const NDArray& oth return NDArray(other); } - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of new array = max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(!shape::shapeEquals(newShapeInfo, getShapeInfo())) { + if(!shape::shapeEquals(newShapeInfo, shapeInfo())) { NDArray result(newShapeInfo, true, getContext()); this->applyTrueBroadcast(op, other, result, false, extraArgs); @@ -3018,12 +3030,12 @@ NDArray NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, NDArray&& other, E return NDArray(other); } - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of new array = max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); - const bool thisMove = shape::shapeEquals(newShapeInfo, getShapeInfo()); - const bool otherMove = shape::shapeEquals(newShapeInfo, other.getShapeInfo()); + const bool thisMove = shape::shapeEquals(newShapeInfo, shapeInfo()); + const bool otherMove = shape::shapeEquals(newShapeInfo, other.shapeInfo()); if(!thisMove && !otherMove) { @@ -3060,12 +3072,12 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::vector& dime // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { // NDArray::prepareSpecialUse({&target}, {this, &other}); - // NativeOpExecutioner::execPairwiseTransform(getContext(), fromBroadcastToPairwise(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + // NativeOpExecutioner::execPairwiseTransform(getContext(), fromBroadcastToPairwise(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); // NDArray::registerSpecialUse({&target}, {this, &other}); // return; // } - if(target.dataType() != DataTypeUtils::pickPairwiseResultType(shapeInfo(), other.getShapeInfo())) + if(target.dataType() != DataTypeUtils::pickPairwiseResultType(shapeInfo(), other.shapeInfo())) throw std::invalid_argument("NDArray::applyBroadcast method: wrong type of target array !"); if(!target.isSameShape(this) && !target.isSameShape(other)) throw std::invalid_argument("NDArray::applyBroadcast method: one of of two input arrays (this or other) should has the same shape as target array!"); @@ -3075,24 +3087,24 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::vector& dime if (dimensions.size() > 1) std::sort(copy.begin(), copy.end()); - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace(), copy); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace(), copy); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcast(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execBroadcast(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); registerSpecialUse({&target}, {this, &other}); } @@ -3112,7 +3124,7 @@ void NDArray::applyBroadcast(sd::broadcast::BoolOps op, const std::vector& // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { // NDArray::prepareSpecialUse({&target}, {this, &other}); - // NativeOpExecutioner::execPairwiseBoolTransform(getContext(), fromBroadcastToPairwiseBool(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + // NativeOpExecutioner::execPairwiseBoolTransform(getContext(), fromBroadcastToPairwiseBool(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); // NDArray::registerSpecialUse({&target}, {this, &other}); // return; // } @@ -3129,24 +3141,24 @@ void NDArray::applyBroadcast(sd::broadcast::BoolOps op, const std::vector& if (dimensions.size() > 1) std::sort(copy.begin(), copy.end()); - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace(), copy); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace(), copy); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcastBool(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + NativeOpExecutioner::execBroadcastBool(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); registerSpecialUse({&target}, {this, &other}); } @@ -3167,7 +3179,7 @@ void NDArray::applyBroadcast(sd::broadcast::IntOps op, const std::vector& d // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { // NDArray::prepareSpecialUse({&target}, {this, &other}); - // NativeOpExecutioner::execPairwiseIntTransform(getContext(), fromBroadcastToPairwiseInt(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + // NativeOpExecutioner::execPairwiseIntTransform(getContext(), fromBroadcastToPairwiseInt(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); // NDArray::registerSpecialUse({&target}, {this, &other}); // return; // } @@ -3184,24 +3196,24 @@ void NDArray::applyBroadcast(sd::broadcast::IntOps op, const std::vector& d if (dimensions.size() > 1) std::sort(copy.begin(), copy.end()); - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace(), copy); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace(), copy); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcastInt(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execBroadcastInt(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); registerSpecialUse({&target}, {this, &other}); } @@ -3364,7 +3376,7 @@ void NDArray::applyPairwiseTransform(sd::pairwise::Ops op, const NDArray& other, throw std::invalid_argument("NDArray::applyPairwiseTransform method - type of target array must be the same as type of this or other array !"); NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); NDArray::registerSpecialUse({&target}, {this, &other}); if (extraParams != nullptr) @@ -3383,7 +3395,7 @@ void NDArray::applyPairwiseTransform(sd::pairwise::BoolOps op, const NDArray& ot throw std::invalid_argument("NDArray::applyPairwiseTransform BoolOps method - this and other arrays must have the same type !"); NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execPairwiseBoolTransform(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); + NativeOpExecutioner::execPairwiseBoolTransform(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); NDArray::registerSpecialUse({&target}, {this, &other}); } @@ -3399,7 +3411,7 @@ void NDArray::applyPairwiseTransform(sd::pairwise::IntOps op, const NDArray& oth throw std::invalid_argument("NDArray::applyPairwiseTransform IntOps method - this and other arrays must have the same type !"); NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execPairwiseIntTransform(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); + NativeOpExecutioner::execPairwiseIntTransform(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); NDArray::registerSpecialUse({&target}, {this, &other}); } @@ -3429,12 +3441,12 @@ void NDArray::varianceAlongDimension(sd::variance::Ops op, NDArray& target, cons NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == dimensions.size() || dimensions.empty()) - NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), biasCorrected); + NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), biasCorrected); else { std::vector copy(dimensions); auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimensions); - NativeOpExecutioner::execSummaryStats(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.getSpecialBuffer(), target.specialShapeInfo(), pDims, dimensions.size(), packX.platformShapeInfo(), packX.platformOffsets(), biasCorrected); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimensions); + NativeOpExecutioner::execSummaryStats(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, dimensions.size(), packX.platformShapeInfo(), packX.platformOffsets(), biasCorrected); synchronize("NDArray::varianceAlongDimension"); } @@ -3534,7 +3546,7 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const { // we need to be able to compare [1, len] to [len] if ((rankOf() == 1 && other->rankOf() == 2) || (rankOf() == 2 && other->rankOf() == 1)) { // FIXME: do something here? - } else if (!shape::equalsSoft(getShapeInfo(), other->getShapeInfo())) + } else if (!shape::equalsSoft(shapeInfo(), other->shapeInfo())) return false; if (isS()) { @@ -3576,11 +3588,11 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const { ExtraArguments extras({0.0, 0.0, eps}); NDArray::prepareSpecialUse({&tmp}, {this, other}); - NativeOpExecutioner::execReduce3Scalar(getContext(), reduce3::EqualsWithEps, getBuffer(), getShapeInfo(), - getSpecialBuffer(), getSpecialShapeInfo(), - extras.argumentsAsT(DataType::FLOAT32), other->getBuffer(), - other->getShapeInfo(), other->getSpecialBuffer(), - other->getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), + NativeOpExecutioner::execReduce3Scalar(getContext(), reduce3::EqualsWithEps, buffer(), shapeInfo(), + specialBuffer(), specialShapeInfo(), + extras.argumentsAsT(DataType::FLOAT32), other->buffer(), + other->shapeInfo(), other->specialBuffer(), + other->specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo()); NDArray::registerSpecialUse({&tmp}, {this, other}); @@ -3722,7 +3734,7 @@ utf8string NDArray::e(const Nd4jLong i) const { syncToHost(); tickReadHost(); - return *(reinterpret_cast(getBuffer())[rp]); + return *(reinterpret_cast(buffer())[rp]); } ///////////////////////////////////////////////////////////////////////// @@ -3733,7 +3745,7 @@ T NDArray::e(const Nd4jLong i) const { NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(getBuffer(), rp), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(buffer(), rp), LIBND4J_TYPES); } BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT , NDArray::e(const Nd4jLong) const, LIBND4J_TYPES); @@ -3747,12 +3759,12 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j) const { throw std::invalid_argument("NDArray::e(i,j): one of input indexes is out of array length or rank!=2 !"); const Nd4jLong coords[2] = {i, j}; - const auto xOffset = shape::getOffset(getShapeInfo(), coords); + const auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(getBuffer(), xOffset), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(buffer(), xOffset), LIBND4J_TYPES); return static_cast(119); } @@ -3767,12 +3779,12 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const { throw std::invalid_argument("NDArray::e(i,j,k): one of input indexes is out of array length or rank!=3 !"); const Nd4jLong coords[3] = {i, j, k}; - const auto xOffset = shape::getOffset(getShapeInfo(), coords); + const auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(getBuffer(), xOffset), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(buffer(), xOffset), LIBND4J_TYPES); return static_cast(119); } @@ -3787,12 +3799,12 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLon throw std::invalid_argument("NDArray::e(i,j,k,l): one of input indexes is out of array length or rank!=4 !"); const Nd4jLong coords[4] = {i, j, k, l}; - const auto xOffset = shape::getOffset(getShapeInfo(), coords); + const auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(getBuffer(), xOffset), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(buffer(), xOffset), LIBND4J_TYPES); return static_cast(119); } @@ -3805,7 +3817,7 @@ NDArray NDArray::e(const Nd4jLong i) const { NDArray scalar(dataType(), getContext()); - scalar.copyBuffersContinuouslyFrom(*this, sizeOfT(), 0, getBufferOffset() + offset); + scalar.copyBuffersContinuouslyFrom(*this, sizeOfT(), 0, bufferOffset() + offset); return scalar; } @@ -3884,7 +3896,7 @@ NDArray NDArray::transform(sd::transform::FloatOps op, void *extraParams) const NDArray result(ordering(), getShapeAsVector(), DataTypeUtils::pickFloatingType(dataType()), getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformFloat(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformFloat(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -3896,7 +3908,7 @@ NDArray NDArray::transform(sd::transform::FloatOps op, void *extraParams) && { throw std::runtime_error("NDArray::transform SameOps: you can't use this method on String array!"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformFloat(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformFloat(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -3907,10 +3919,10 @@ NDArray NDArray::transform(sd::transform::SameOps op, void *extraParams) const & if (isS()) throw std::runtime_error("NDArray::transform SameOps: you can't use this method on String array!"); - NDArray result(getShapeInfo(), false, getContext()); + NDArray result(shapeInfo(), false, getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformSame(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -3922,7 +3934,7 @@ NDArray NDArray::transform(sd::transform::SameOps op, void *extraParams) && { throw std::runtime_error("NDArray::transform SameOps: you can't use this method on String array!"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformSame(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -3933,10 +3945,10 @@ NDArray NDArray::transform(sd::transform::StrictOps op, void *extraParams) const if (!this->isR()) throw std::runtime_error("Source array must have one of FLOAT types"); - NDArray result(getShapeInfo(), false, getContext()); + NDArray result(shapeInfo(), false, getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformStrict(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformStrict(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -3948,7 +3960,7 @@ NDArray NDArray::transform(sd::transform::StrictOps op, void *extraParams) && { throw std::runtime_error("Source array must have one of FLOAT types"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformStrict(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformStrict(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -3962,7 +3974,7 @@ NDArray NDArray::transform(sd::transform::BoolOps op, void *extraParams) const & NDArray result(ordering(), getShapeAsVector(), sd::DataType::BOOL, getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformBool(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -3974,7 +3986,7 @@ NDArray NDArray::transform(sd::transform::BoolOps op, void *extraParams) && { throw std::runtime_error("NDArray::transform BoolOps: you can't use this method on String array!"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformBool(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -3987,11 +3999,11 @@ void NDArray::applyScalarArr(sd::scalar::Ops op, const NDArray& scalar, NDArray& if (scalar.lengthOf() != 1) throw std::invalid_argument("NDArray::applyScalarArr method: operand is not a scalar!"); - if(target.dataType() != DataTypeUtils::pickPairwiseResultType(shapeInfo(), scalar.getShapeInfo()) && !(target.dataType() == dataType() || target.dataType() == scalar.dataType())) + if(target.dataType() != DataTypeUtils::pickPairwiseResultType(shapeInfo(), scalar.shapeInfo()) && !(target.dataType() == dataType() || target.dataType() == scalar.dataType())) throw std::invalid_argument("NDArray::applyScalarArr method: wrong type of target array!"); NDArray::prepareSpecialUse({&target}, {this, &scalar}); - NativeOpExecutioner::execScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.getBuffer(), scalar.getShapeInfo(), scalar.getSpecialBuffer(), scalar.getSpecialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); + NativeOpExecutioner::execScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.buffer(), scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); NDArray::registerSpecialUse({&target}, {this, &scalar}); } @@ -4007,7 +4019,7 @@ void NDArray::applyScalarArr(sd::scalar::BoolOps op, const NDArray& scalar, NDAr } NDArray::prepareSpecialUse({&target}, {this, &scalar}); - NativeOpExecutioner::execScalarBool(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.getBuffer(), scalar.getShapeInfo(), scalar.getSpecialBuffer(), scalar.getSpecialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); + NativeOpExecutioner::execScalarBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.buffer(), scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); NDArray::registerSpecialUse({&target}, {this, &scalar}); } @@ -4024,7 +4036,7 @@ void NDArray::applyScalarArr(sd::scalar::IntOps op, const NDArray& scalar, NDArr } NDArray::prepareSpecialUse({&target}, {this, &scalar}); - NativeOpExecutioner::execScalarInt(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.getBuffer(), scalar.getShapeInfo(), scalar.getSpecialBuffer(), scalar.getSpecialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); + NativeOpExecutioner::execScalarInt(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.buffer(), scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); NDArray::registerSpecialUse({&target}, {this, &scalar}); } @@ -4100,14 +4112,14 @@ void NDArray::applyIndexReduce(sd::indexreduce::Ops op, NDArray& target, const s NDArray::prepareSpecialUse({&target}, {this}); if (target.lengthOf() == 1) { - NativeOpExecutioner::execIndexReduceScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execIndexReduceScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { std::vector copy = dimensions; shape::checkDimensions(rankOf(), copy); auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), copy); - NativeOpExecutioner::execIndexReduce(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + NativeOpExecutioner::execIndexReduce(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); synchronize("NDArray::applyIndexReduce"); } @@ -4147,7 +4159,7 @@ NDArray NDArray::applyReduce3(sd::reduce3::Ops op, const NDArray& other, const E void* params = extraParams != nullptr ? const_cast(extraParams)->argumentsAsT(dataType()) : nullptr; NDArray::prepareSpecialUse({&result}, {this, &other}); - NativeOpExecutioner::execReduce3Scalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduce3Scalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this, &other}); return result; @@ -4175,19 +4187,19 @@ NDArray NDArray::applyReduce3(sd::reduce3::Ops op, const NDArray& other, const s // perform calculations if(rankOf() == copy.size() && other.rankOf() == copy.size()) { - NativeOpExecutioner::execReduce3Scalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduce3Scalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); } else { auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), copy); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(other.getShapeInfo(), copy); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(other.shapeInfo(), copy); if(!shape::equalsSoft(packX.primaryShapeInfo(), packY.primaryShapeInfo()) || (packX.numberOfTads() != packY.numberOfTads() && packX.numberOfTads() != 1 && packY.numberOfTads() != 1)) throw std::runtime_error("NDArray::applyReduce3 cuda method: arrays tads are inconsistent !"); - NativeOpExecutioner::execReduce3(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets()); + NativeOpExecutioner::execReduce3(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets()); } registerSpecialUse({&result}, {this, &other}); @@ -4208,8 +4220,8 @@ NDArray NDArray::applyAllReduce3(sd::reduce3::Ops op, const NDArray& other, cons shape::checkDimensions(rankOf(), copy); shape::checkDimensions(other.rankOf(), copy); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), copy); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(other.getShapeInfo(), copy); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + auto packY = ConstantTadHelper::getInstance()->tadForDimensions(other.shapeInfo(), copy); // check tads shapes if(!shape::equalsSoft(packX.primaryShapeInfo(), packY.primaryShapeInfo())) @@ -4227,7 +4239,7 @@ NDArray NDArray::applyAllReduce3(sd::reduce3::Ops op, const NDArray& other, cons auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; NDArray::prepareSpecialUse({&result}, {this, &other}); - NativeOpExecutioner::execReduce3All(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets()); + NativeOpExecutioner::execReduce3All(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets()); NDArray::registerSpecialUse({&result}, {this, &other}); return result; @@ -4246,18 +4258,18 @@ void NDArray::reduceAlongDimension(sd::reduce::FloatOps op, NDArray& target, con if(checkTargetShape) { auto newShape = ShapeUtils::evalReduceShapeInfo(target.ordering(), copy, *this, keepDims, supportOldShapes, getContext()->getWorkspace()); - if(!shape::shapeEquals(newShape, target.getShapeInfo())) + if(!shape::shapeEquals(newShape, target.shapeInfo())) throw std::runtime_error("NDArray::reduceAlongDimension FloatOps: wrong target shape!"); } NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == copy.size() || copy.empty()) { - NativeOpExecutioner::execReduceFloatScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(),nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceFloatScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(),nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), copy); - NativeOpExecutioner::execReduceFloat(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), copy.data(), copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + NativeOpExecutioner::execReduceFloat(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), copy.data(), copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension FloatOps"); @@ -4277,19 +4289,19 @@ void NDArray::reduceAlongDimension(sd::reduce::SameOps op, NDArray& target, cons if(checkTargetShape) { auto newShape = ShapeUtils::evalReduceShapeInfo(target.ordering(), copy, *this, keepDims, supportOldShapes, getContext()->getWorkspace()); - if(!shape::shapeEquals(newShape, target.getShapeInfo())) + if(!shape::shapeEquals(newShape, target.shapeInfo())) throw std::runtime_error("NDArray::reduceAlongDimension SameOps: wrong target shape!"); } NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == copy.size() || copy.empty()) { - NativeOpExecutioner::execReduceSameScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceSameScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { //if (!isEmpty()) { auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), copy); - NativeOpExecutioner::execReduceSame(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy); + NativeOpExecutioner::execReduceSame(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension SameOps"); @@ -4309,19 +4321,19 @@ void NDArray::reduceAlongDimension(sd::reduce::LongOps op, NDArray& target, cons if(checkTargetShape) { auto newShape = ShapeUtils::evalReduceShapeInfo(target.ordering(), copy, *this, keepDims, supportOldShapes, getContext()->getWorkspace()); - if(!shape::shapeEquals(newShape, target.getShapeInfo())) + if(!shape::shapeEquals(newShape, target.shapeInfo())) throw std::runtime_error("NDArray::reduceAlongDimension LongOps: wrong target shape!"); } NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == copy.size() || copy.empty()) { - NativeOpExecutioner::execReduceLongScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceLongScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), copy); - NativeOpExecutioner::execReduceLong(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy); + NativeOpExecutioner::execReduceLong(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension LongOps"); @@ -4341,19 +4353,19 @@ void NDArray::reduceAlongDimension(sd::reduce::BoolOps op, NDArray& target, cons if(checkTargetShape) { auto newShape = ShapeUtils::evalReduceShapeInfo(target.ordering(), copy, *this, keepDims, supportOldShapes, getContext()->getWorkspace()); - if(!shape::shapeEquals(newShape, target.getShapeInfo())) + if(!shape::shapeEquals(newShape, target.shapeInfo())) throw std::runtime_error("NDArray::reduceAlongDimension BoolOps cuda: wrong target shape!"); } NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == copy.size() || copy.empty()) { - NativeOpExecutioner::execReduceBoolScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceBoolScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), copy); - NativeOpExecutioner::execReduceBool(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy); + NativeOpExecutioner::execReduceBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension LongOps"); @@ -4372,7 +4384,7 @@ void NDArray::p(const Nd4jLong i, const T value) { const void *pV = reinterpret_cast(const_cast(&value)); NDArray::preparePrimaryUse({this}, {}, true); - BUILD_SINGLE_PARTIAL_SELECTOR(this->dataType(), templatedSet<, T>(this->getBuffer(), rp, pV), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(this->dataType(), templatedSet<, T>(this->buffer(), rp, pV), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } @@ -4400,10 +4412,10 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const T value) { void *p = reinterpret_cast(const_cast(&value)); Nd4jLong coords[2] = {i, j}; - auto xOffset = shape::getOffset(getShapeInfo(), coords); + auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({this}, {}, true); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const double value); @@ -4432,8 +4444,8 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const T va void *p = reinterpret_cast(const_cast(&value)); Nd4jLong coords[3] = {i, j, k}; - auto xOffset = shape::getOffset(getShapeInfo(), coords); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); + auto xOffset = shape::getOffset(shapeInfo(), coords); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const double value); @@ -4459,10 +4471,10 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4j void *p = reinterpret_cast(const_cast(&value)); Nd4jLong coords[4] = {i, j, k, l}; - auto xOffset = shape::getOffset(getShapeInfo(), coords); + auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({this}, {}, true); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const double value); @@ -4489,7 +4501,7 @@ void NDArray::p(const Nd4jLong i, const NDArray& scalar) { NDArray::preparePrimaryUse({this}, {&scalar}, true); auto rp = getOffset(i); - BUILD_SINGLE_SELECTOR(scalar.dataType(), templatedSet, (getBuffer(), rp, scalar.dataType(), scalar.getBuffer()), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(scalar.dataType(), templatedSet, (buffer(), rp, scalar.dataType(), scalar.buffer()), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {&scalar}); } @@ -4501,13 +4513,13 @@ void NDArray::p(const Nd4jLong i, const NDArray& scalar) { if (i >= _length) throw std::invalid_argument("NDArray::p(i, NDArray_scalar): input index is out of array length !"); -// void *p = reinterpret_cast(scalar.getBuffer()); +// void *p = reinterpret_cast(scalar.buffer()); Nd4jLong coords[4] = {i, j, k, l}; - auto xOffset = shape::getOffset(getShapeInfo(), coords); + auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({this}, {&scalar}, true); -// BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); - BUILD_SINGLE_SELECTOR(scalar.dataType(), templatedSet, (this->getBuffer(), xOffset, scalar.dataType(), scalar.getBuffer()), LIBND4J_TYPES); +// BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(scalar.dataType(), templatedSet, (this->buffer(), xOffset, scalar.dataType(), scalar.buffer()), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {&scalar}); } @@ -4523,10 +4535,10 @@ void NDArray::addRowVector(const NDArray& row, NDArray& target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &row}); } @@ -4542,10 +4554,10 @@ void NDArray::subRowVector(const NDArray& row, NDArray& target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Subtract, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), &dimension, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), &dimension, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &row}); } @@ -4562,10 +4574,10 @@ void NDArray::mulRowVector(const NDArray &row, NDArray &target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &row}); } @@ -4583,10 +4595,10 @@ void NDArray::divRowVector(const NDArray &row, NDArray &target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Divide, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &row}); } @@ -4601,10 +4613,10 @@ void NDArray::addiRowVector(const NDArray& row) { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({this}, {&row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({this}, {&row}); } @@ -4619,10 +4631,10 @@ void NDArray::addColumnVector(const NDArray &column, NDArray &target) const { int dimension = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &column}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), column.getBuffer(), column.getShapeInfo(), column.getSpecialBuffer(), column.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &column}); } @@ -4636,10 +4648,10 @@ void NDArray::addiColumnVector(const NDArray &column) { int dimension = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({this}, {&column}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), column.getBuffer(), column.getShapeInfo(), column.getSpecialBuffer(), column.getSpecialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({this}, {&column}); } @@ -4653,10 +4665,10 @@ void NDArray::muliColumnVector(const NDArray& column) { int dimension = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({this}, {&column}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), column.getBuffer(), column.getShapeInfo(), column.getSpecialBuffer(), column.getSpecialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({this}, {&column}); } @@ -4694,7 +4706,7 @@ ResultSet NDArray::multipleTensorsAlongDimension(const std::vector &indices if (indices.size() == 0) return result; - auto pack = ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), const_cast(dimensions.data()), dimensions.size()); + auto pack = ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), const_cast(dimensions.data()), dimensions.size()); auto tadLength = shape::length(pack.primaryShapeInfo()); auto numTads = lengthOf() / tadLength; @@ -4705,7 +4717,7 @@ ResultSet NDArray::multipleTensorsAlongDimension(const std::vector &indices throw std::runtime_error("Bad index"); } - auto array = new NDArray(getDataBuffer(), ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + getBufferOffset()); + auto array = new NDArray(getDataBuffer(), ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + bufferOffset()); result.push_back(array); } @@ -4777,7 +4789,7 @@ NDArray NDArray::diagonal(const char type) const { indices[i] = 1; } - auto step = shape::getOffset(getShapeInfo(), indices); + auto step = shape::getOffset(shapeInfo(), indices); if(type == 'c') { outShapeInfo[1] = diagSize; @@ -4796,7 +4808,7 @@ NDArray NDArray::diagonal(const char type) const { ArrayOptions::setDataType(outShapeInfo, this->dataType()); - NDArray result(_buffer, ShapeDescriptor(outShapeInfo), getContext(), getBufferOffset()); + NDArray result(_buffer, ShapeDescriptor(outShapeInfo), getContext(), bufferOffset()); RELEASE(outShapeInfo, getContext()->getWorkspace()); @@ -4819,7 +4831,7 @@ ResultSet NDArray::allTensorsAlongDimension(const std::vector &dimensions) auto numTads = pack.numberOfTads(); for (Nd4jLong idx = 0; idx < numTads; idx++ ) { - auto array = new NDArray(_buffer, ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + getBufferOffset()); + auto array = new NDArray(_buffer, ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + bufferOffset()); array->_isView = true; result.push_back(array); } @@ -4862,9 +4874,9 @@ NDArray NDArray::operator()(const std::vector& idx, const bool keepUni Nd4jLong offset; - shape::calcSubArrShapeInfoAndOffset(idx.data(), getShapeInfo(), subArrShapeInfo, offset, keepUnitiesInShape, isStrided, numOfUntiesInSubArrShape); + shape::calcSubArrShapeInfoAndOffset(idx.data(), shapeInfo(), subArrShapeInfo, offset, keepUnitiesInShape, isStrided, numOfUntiesInSubArrShape); - NDArray result(_buffer, ShapeDescriptor(subArrShapeInfo), getContext(), offset + getBufferOffset()); + NDArray result(_buffer, ShapeDescriptor(subArrShapeInfo), getContext(), offset + bufferOffset()); result._isView = true; RELEASE(subArrShapeInfo, getContext()->getWorkspace()); @@ -5025,7 +5037,7 @@ NDArray operator+(NDArray&& arr, const T& scalar) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Add, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.buffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Add, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5044,10 +5056,10 @@ NDArray operator+(const NDArray& arr, const T& scalar) { throw std::runtime_error("operator+(const NDArray& arr, const T& scalar): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Add, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Add, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5095,7 +5107,7 @@ NDArray operator-(NDArray&& arr, const T& scalar) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Subtract, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.buffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Subtract, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5111,10 +5123,10 @@ NDArray operator-(const NDArray& arr, const T& scalar) { throw std::runtime_error("operator-(const NDArray& arr, const T& scalar): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Subtract, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Subtract, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5138,7 +5150,7 @@ NDArray operator-(const T& scalar, NDArray&& arr) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseSubtract, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.getBuffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseSubtract, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5158,10 +5170,10 @@ NDArray operator-(const T& scalar, const NDArray& arr) { throw std::runtime_error("operator-(const T& scalar, const NDArray& arr): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseSubtract, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.getBuffer(), result.getShapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseSubtract, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5186,7 +5198,7 @@ NDArray operator*(NDArray&& arr, const T& scalar) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Multiply, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.buffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Multiply, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5206,10 +5218,10 @@ NDArray operator*(const NDArray& arr, const T& scalar) { throw std::runtime_error("operator*(const NDArray& arr, const T& scalar): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Multiply, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Multiply, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5262,7 +5274,7 @@ NDArray operator/(NDArray&& arr, const T& scalar) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Divide, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.buffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Divide, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5281,10 +5293,10 @@ NDArray operator/(const NDArray& arr, const T& scalar) { throw std::runtime_error("operator/(const NDArray& arr, const T& scalar): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Divide, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Divide, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5309,7 +5321,7 @@ NDArray operator/(const T& scalar, NDArray&& arr) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseDivide, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.getBuffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseDivide, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5330,10 +5342,10 @@ NDArray operator/(const T& scalar, const NDArray& arr) { throw std::runtime_error("operator/(const T& scalar, const NDArray& arr): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseDivide, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.getBuffer(), result.getShapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseDivide, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5365,10 +5377,10 @@ NDArray operator+(T1&& arr1, T2&& arr2) { else if(isArr2Rvalue) result = const_cast(&arr2); else - result = new NDArray(arr1.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.getShapeInfo(), arr2.getShapeInfo()), false, arr1.getContext()); + result = new NDArray(arr1.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.shapeInfo(), arr2.shapeInfo()), false, arr1.getContext()); NDArray::prepareSpecialUse({result}, {&arr1, &arr2}); - NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Add, arr1.getBuffer(), arr1.getShapeInfo(), arr1.getSpecialBuffer(), arr1.getSpecialShapeInfo(), arr2.getBuffer(), arr2.getShapeInfo(), arr2.getSpecialBuffer(), arr2.getSpecialShapeInfo(), result->buffer(), result->getShapeInfo(), result->specialBuffer(), result->getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Add, arr1.buffer(), arr1.shapeInfo(), arr1.specialBuffer(), arr1.specialShapeInfo(), arr2.buffer(), arr2.shapeInfo(), arr2.specialBuffer(), arr2.specialShapeInfo(), result->buffer(), result->shapeInfo(), result->specialBuffer(), result->specialShapeInfo(), nullptr); NDArray::registerSpecialUse({result}, {&arr1, &arr2}); if(!isArr1Rvalue && !isArr2Rvalue) { @@ -5415,10 +5427,10 @@ NDArray operator-(T1&& arr1, T2&& arr2) { else if(isArr2Rvalue) result = const_cast(&arr2); else - result = new NDArray(arr1.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.getShapeInfo(), arr2.getShapeInfo()), false, arr1.getContext()); + result = new NDArray(arr1.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.shapeInfo(), arr2.shapeInfo()), false, arr1.getContext()); NDArray::prepareSpecialUse({result}, {&arr1, &arr2}); - NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Subtract, arr1.getBuffer(), arr1.getShapeInfo(), arr1.getSpecialBuffer(), arr1.getSpecialShapeInfo(), arr2.getBuffer(), arr2.getShapeInfo(), arr2.getSpecialBuffer(), arr2.getSpecialShapeInfo(), result->buffer(), result->getShapeInfo(), result->specialBuffer(), result->getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Subtract, arr1.buffer(), arr1.shapeInfo(), arr1.specialBuffer(), arr1.specialShapeInfo(), arr2.buffer(), arr2.shapeInfo(), arr2.specialBuffer(), arr2.specialShapeInfo(), result->buffer(), result->shapeInfo(), result->specialBuffer(), result->specialShapeInfo(), nullptr); NDArray::registerSpecialUse({result}, {&arr1, &arr2}); if(!isArr1Rvalue && !isArr2Rvalue) { @@ -5465,10 +5477,10 @@ NDArray operator*(T1&& arr1, T2&& arr2) { else if(isArr2Rvalue) result = const_cast(&arr2); else - result = new NDArray(arr1.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.getShapeInfo(), arr2.getShapeInfo()), false, arr1.getContext()); + result = new NDArray(arr1.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.shapeInfo(), arr2.shapeInfo()), false, arr1.getContext()); NDArray::prepareSpecialUse({result}, {&arr1, &arr2}); - NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Multiply, arr1.getBuffer(), arr1.getShapeInfo(), arr1.getSpecialBuffer(), arr1.getSpecialShapeInfo(), arr2.getBuffer(), arr2.getShapeInfo(), arr2.getSpecialBuffer(), arr2.getSpecialShapeInfo(), result->buffer(), result->getShapeInfo(), result->specialBuffer(), result->getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Multiply, arr1.buffer(), arr1.shapeInfo(), arr1.specialBuffer(), arr1.specialShapeInfo(), arr2.buffer(), arr2.shapeInfo(), arr2.specialBuffer(), arr2.specialShapeInfo(), result->buffer(), result->shapeInfo(), result->specialBuffer(), result->specialShapeInfo(), nullptr); NDArray::registerSpecialUse({result}, {&arr1, &arr2}); if(!isArr1Rvalue && !isArr2Rvalue) { @@ -5515,10 +5527,10 @@ NDArray operator/(T1&& arr1, T2&& arr2) { else if(isArr2Rvalue) result = const_cast(&arr2); else - result = new NDArray(arr1.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.getShapeInfo(), arr2.getShapeInfo()), false, arr1.getContext()); + result = new NDArray(arr1.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.shapeInfo(), arr2.shapeInfo()), false, arr1.getContext()); NDArray::prepareSpecialUse({result}, {&arr1, &arr2}); - NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Divide, arr1.getBuffer(), arr1.getShapeInfo(), arr1.getSpecialBuffer(), arr1.getSpecialShapeInfo(), arr2.getBuffer(), arr2.getShapeInfo(), arr2.getSpecialBuffer(), arr2.getSpecialShapeInfo(), result->buffer(), result->getShapeInfo(), result->specialBuffer(), result->getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Divide, arr1.buffer(), arr1.shapeInfo(), arr1.specialBuffer(), arr1.specialShapeInfo(), arr2.buffer(), arr2.shapeInfo(), arr2.specialBuffer(), arr2.specialShapeInfo(), result->buffer(), result->shapeInfo(), result->specialBuffer(), result->specialShapeInfo(), nullptr); NDArray::registerSpecialUse({result}, {&arr1, &arr2}); if(!isArr1Rvalue && !isArr2Rvalue) { diff --git a/libnd4j/include/array/NDArrayLambda.hXX b/libnd4j/include/array/NDArrayLambda.hXX index 50d9bc8d6..f213b6aa6 100644 --- a/libnd4j/include/array/NDArrayLambda.hXX +++ b/libnd4j/include/array/NDArrayLambda.hXX @@ -23,26 +23,26 @@ #include #include -static Nd4jLong __device__ __noinline__ getIndexOffset(Nd4jLong index, Nd4jLong *shapeInfo) { +static Nd4jLong __device__ __noinline__ getIndexOffset(Nd4jLong index, const Nd4jLong *shapeInfo) { return shape::getIndexOffset(index, shapeInfo); } -static Nd4jLong __device__ __noinline__ length(Nd4jLong *shapeInfo) { +static Nd4jLong __device__ __noinline__ length(const Nd4jLong *shapeInfo) { return shape::length(shapeInfo); } -template static _CUDA_G void lambdaKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); -template static _CUDA_G void lambdaIndexedKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); -template static _CUDA_G void lambdaIndexedPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); -template static _CUDA_G void lambdaPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); -template static _CUDA_G void lambdaTriplewiseKernel(void* vw, Nd4jLong *wShapeInfo, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaKernel(const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaIndexedKernel(const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaIndexedPairwiseKernel(const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaPairwiseKernel(const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaTriplewiseKernel(const void* vw, const Nd4jLong *wShapeInfo, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); template class LambdaHelper { public: template - FORCEINLINE static void lambdaLauncher(cudaStream_t *stream, void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaLauncher(cudaStream_t *stream, const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaKernel<<<256, 512, 1024, *stream>>>(vx, xShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -50,7 +50,7 @@ public: } template - FORCEINLINE static void lambdaIndexedLauncher(cudaStream_t *stream, void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaIndexedLauncher(cudaStream_t *stream, const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaIndexedKernel<<<256, 512, 1024, *stream>>>(vx, xShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -58,7 +58,7 @@ public: } template - FORCEINLINE static void lambdaPairwiseLauncher(cudaStream_t *stream, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaPairwiseLauncher(cudaStream_t *stream, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaPairwiseKernel<<<256, 512, 1024, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -66,7 +66,7 @@ public: } template - FORCEINLINE static void lambdaIndexedPairwiseLauncher(cudaStream_t *stream, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaIndexedPairwiseLauncher(cudaStream_t *stream, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaIndexedPairwiseKernel<<<256, 512, 1024, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -74,7 +74,7 @@ public: } template - FORCEINLINE static void lambdaTriplewiseLauncher(cudaStream_t *stream, void* vw, Nd4jLong *wShapeInfo, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaTriplewiseLauncher(cudaStream_t *stream,const void* vw, const Nd4jLong *wShapeInfo, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaTriplewiseKernel<<<256, 512, 1024, *stream>>>(vw, wShapeInfo, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -84,8 +84,8 @@ public: //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto x = reinterpret_cast(vx); +static _CUDA_G void lambdaKernel(const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -113,8 +113,8 @@ static _CUDA_G void lambdaKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jL //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaIndexedKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto x = reinterpret_cast(vx); +static _CUDA_G void lambdaIndexedKernel(const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -142,9 +142,9 @@ static _CUDA_G void lambdaIndexedKernel(void* vx, Nd4jLong *xShapeInfo, void *vz //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaIndexedPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); +static _CUDA_G void lambdaIndexedPairwiseKernel(const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -175,9 +175,9 @@ static _CUDA_G void lambdaIndexedPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); +static _CUDA_G void lambdaPairwiseKernel(const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -208,10 +208,10 @@ static _CUDA_G void lambdaPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* v //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaTriplewiseKernel(void* vw, Nd4jLong *wShapeInfo, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto w = reinterpret_cast(vw); - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); +static _CUDA_G void lambdaTriplewiseKernel(const void* vw, const Nd4jLong *wShapeInfo, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto w = reinterpret_cast(vw); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto wEws = shape::elementWiseStride(wShapeInfo); @@ -271,7 +271,7 @@ void NDArray::applyPairwiseLambda(const NDArray& other, Lambda func, NDArray& ta //throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, target.dataType()); prepareSpecialUse({&target}, {this, &other}); - BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES); registerSpecialUse({&target}, {this, &other}); } @@ -298,7 +298,7 @@ void NDArray::applyIndexedPairwiseLambda(NDArray& other, Lambda func, NDArray& t throw std::runtime_error("NDArray::applyIndexedPairwiseLambda X/Y/Z data types must be the same"); prepareSpecialUse({&target}, {this, &other}); - BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES); registerSpecialUse({&target}, {this, &other}); } diff --git a/libnd4j/include/array/ShapeList.h b/libnd4j/include/array/ShapeList.h index 2d0fde4ad..f0034ac81 100644 --- a/libnd4j/include/array/ShapeList.h +++ b/libnd4j/include/array/ShapeList.h @@ -28,26 +28,24 @@ namespace sd { class ND4J_EXPORT ShapeList { protected: - std::vector _shapes; + std::vector _shapes; bool _destroyed = false; bool _autoremovable = false; bool _workspace = false; public: - ShapeList(Nd4jLong* shape = nullptr); - ShapeList(std::initializer_list shapes); - ShapeList(std::initializer_list shapes, bool isWorkspace); - ShapeList(std::vector& shapes); + ShapeList(const Nd4jLong* shape = nullptr); + ShapeList(const std::vector &shapes, bool isWorkspace); + ShapeList(const std::vector& shapes); //ShapeList(bool autoRemovable); ~ShapeList(); - std::vector* asVector(); + std::vector* asVector(); void destroy(); - int size(); - Nd4jLong* at(int idx); - void push_back(Nd4jLong *shape); - void push_back(std::vector& shape); + int size() const; + const Nd4jLong* at(int idx); + void push_back(const Nd4jLong *shape); /** * PLEASE NOTE: This method should be called ONLY if shapes were generated at workspaces. Otherwise you'll get memory leak diff --git a/libnd4j/include/array/TadPack.h b/libnd4j/include/array/TadPack.h index 09b084548..3cd95fa59 100644 --- a/libnd4j/include/array/TadPack.h +++ b/libnd4j/include/array/TadPack.h @@ -28,18 +28,18 @@ namespace sd { private: ConstantDataBuffer _tadShape; ConstantDataBuffer _tadOffsets; - Nd4jLong _numTads; - int _shapeInfoLength; + Nd4jLong _numTads = 0 ; + int _shapeInfoLength = 0; public: explicit TadPack(ConstantDataBuffer &shapes, ConstantDataBuffer &offets, Nd4jLong numTads); TadPack() = default; ~TadPack() = default; - Nd4jLong* primaryShapeInfo() const; - Nd4jLong* primaryOffsets() const; + const Nd4jLong* primaryShapeInfo() const; + const Nd4jLong* primaryOffsets() const; - Nd4jLong* specialShapeInfo() const; - Nd4jLong* specialOffsets() const; + const Nd4jLong* specialShapeInfo() const; + const Nd4jLong* specialOffsets() const; Nd4jLong numberOfTads() const; int shapeInfoLength() const; @@ -48,8 +48,8 @@ namespace sd { * These methods return either primary or special pointers depending on platform binaries were compiled for * @return */ - Nd4jLong *platformShapeInfo() const; - Nd4jLong *platformOffsets() const; + const Nd4jLong *platformShapeInfo() const; + const Nd4jLong *platformOffsets() const; }; } diff --git a/libnd4j/include/array/cpu/NDArray.cpp b/libnd4j/include/array/cpu/NDArray.cpp index 1d97ba61c..87369f740 100644 --- a/libnd4j/include/array/cpu/NDArray.cpp +++ b/libnd4j/include/array/cpu/NDArray.cpp @@ -52,10 +52,9 @@ namespace sd { //////////////////////////////////////////////////////////////////////// void* NDArray::platformBuffer() { return buffer(); } -void* NDArray::getPlatformBuffer() const { return getBuffer(); } +void const* NDArray::platformBuffer() const { return buffer(); } -Nd4jLong* NDArray::getPlatformShapeInfo() const { return getShapeInfo(); } -Nd4jLong* NDArray::platformShapeInfo() { return shapeInfo(); } +Nd4jLong const* NDArray::platformShapeInfo() const { return shapeInfo(); } void NDArray::syncToDevice() const { } void NDArray::syncToHost() const { } @@ -85,15 +84,15 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& t upper = target.sizeAt(-1); const T value = static_cast(val); - const auto x = reinterpret_cast(getBuffer()); - auto z = reinterpret_cast(target.getBuffer()); + const auto x = reinterpret_cast(buffer()); + auto z = reinterpret_cast(target.buffer()); const int xRank = rankOf(); const int zRank = target.rankOf(); const auto zLen = target.lengthOf(); - const bool areSameOffsets = shape::haveSameShapeAndStrides(getShapeInfo(), target.getShapeInfo()); + const bool areSameOffsets = shape::haveSameShapeAndStrides(shapeInfo(), target.shapeInfo()); auto func = PRAGMA_THREADS_FOR { @@ -101,8 +100,8 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& t for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, target.getShapeInfo(), coords); - const auto zOffset = shape::getOffset(target.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, target.shapeInfo(), coords); + const auto zOffset = shape::getOffset(target.shapeInfo(), coords); // if( (row + upper < col) || (row + lower > col) ) if ((coords[zRank - 2] + upper < coords[zRank - 1]) || (coords[zRank - 2] + lower > coords[zRank - 1])) @@ -113,7 +112,7 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& t coords[0] = coords[1]; } - const auto xOffset = areSameOffsets ? zOffset : shape::getOffset(getShapeInfo(), coords); + const auto xOffset = areSameOffsets ? zOffset : shape::getOffset(shapeInfo(), coords); z[zOffset] = x[xOffset]; if (xRank != zRank) // restore first coordinate @@ -140,7 +139,7 @@ void NDArray::setIdentity() { for(int j = 0; j < rank; ++j) indices[j] = 1; - Nd4jLong offset = shape::getOffset(getShapeInfo(), indices); + Nd4jLong offset = shape::getOffset(shapeInfo(), indices); for(int i = 0; i < rank; ++i) if(minDim > shape[i]) @@ -214,23 +213,28 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre } + //////////////////////////////////////////////////////////////////////// + void* NDArray::specialBufferWithOffset(Nd4jLong offset) { + return nullptr; + } + //////////////////////////////////////////////////////////////////////// -void* NDArray::specialBufferWithOffset(Nd4jLong offset) const { +const void* NDArray::specialBufferWithOffset(Nd4jLong offset) const { return nullptr; } //////////////////////////////////////////////////////////////////////// void* NDArray::specialBuffer() { if (_buffer->special() == nullptr) - return getBuffer(); + return buffer(); // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } //////////////////////////////////////////////////////////////////////// -void* NDArray::getSpecialBuffer() const { +void const* NDArray::specialBuffer() const { if (_buffer->special() == nullptr) - return getBuffer(); + return buffer(); // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } @@ -253,7 +257,7 @@ NDArray NDArray::tile(const std::vector& reps) const { NDArray result(*this); if(diff < 0) { // reshape to higher dimension std::vector shapeNew = reps; // there is requirement to have unities at first "diff" positions of new shape - memcpy(&shapeNew[-diff], result.getShapeInfo()+1, rankOld * sizeof(Nd4jLong)); // put old shape numbers at rest of positions + memcpy(&shapeNew[-diff], result.shapeInfo()+1, rankOld * sizeof(Nd4jLong)); // put old shape numbers at rest of positions result.reshapei(ordering(), shapeNew); } return result; // nothing to do, if diff >= 0 -> identity tile @@ -274,8 +278,8 @@ NDArray NDArray::tile(const std::vector& reps) const { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - auto yOffset = shape::subArrayOffset(i, newShapeInfo, getShapeInfo()); - BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign,(result.getBuffer(), i, this->getBuffer(), yOffset), LIBND4J_TYPES); + auto yOffset = shape::subArrayOffset(i, newShapeInfo, shapeInfo()); + BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign,(result.buffer(), i, this->buffer(), yOffset), LIBND4J_TYPES); } }; @@ -286,8 +290,8 @@ NDArray NDArray::tile(const std::vector& reps) const { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { auto xOffset = result.getOffset(i); - auto yOffset = shape::subArrayOffset(i, newShapeInfo, getShapeInfo()); - BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign,(result.getBuffer(), xOffset, this->getBuffer(), yOffset), LIBND4J_TYPES); + auto yOffset = shape::subArrayOffset(i, newShapeInfo, shapeInfo()); + BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign,(result.buffer(), xOffset, this->buffer(), yOffset), LIBND4J_TYPES); } }; @@ -307,7 +311,7 @@ void NDArray::tile(const std::vector& reps, NDArray& target) const { // evaluate true tile shapeInfo for comparison with target shapeInfo auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace()); - if(!shape::equalsSoft(newShapeInfo, target.getShapeInfo())) { + if(!shape::equalsSoft(newShapeInfo, target.shapeInfo())) { delete []newShapeInfo; throw std::runtime_error("NDArray::tile method - shapeInfo of target array is not suitable for tile operation !"); } @@ -319,14 +323,14 @@ void NDArray::tile(const std::vector& reps, NDArray& target) const { if(target.ordering() == 'c' && ews == 1) { // ews == 1 always here //#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided) for(Nd4jLong i=0; i 1) { for(Nd4jLong i=0; i& reps, NDArray& target) const { for(Nd4jLong i=0; i= 1) { for(Nd4jLong i=0; i(this)->setShapeInfo(this->getShapeInfo()); + const_cast(this)->setShapeInfo(this->shapeInfo()); // now we actually migrate data buffer _buffer->migrate(); @@ -142,7 +142,7 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& t PointersManager manager(getContext(), "NDArray::fillAsTriangular"); NDArray::prepareSpecialUse({&target}, {this}); - fillAsTriangularCuda<<getCudaStream()>>>(getPlatformBuffer(), getPlatformShapeInfo(), target.getPlatformBuffer(), target.getPlatformShapeInfo(), static_cast(val), lower, upper); + fillAsTriangularCuda<<getCudaStream()>>>(platformBuffer(), platformShapeInfo(), target.platformBuffer(), target.platformShapeInfo(), static_cast(val), lower, upper); NDArray::registerSpecialUse({&target}, {this}); manager.synchronize(); @@ -206,7 +206,7 @@ void NDArray::setIdentity() { PointersManager manager(getContext(), "NDArray::setIdentity"); syncToDevice(); - BUILD_SINGLE_SELECTOR(dataType(), identityMatrixCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getPlatformBuffer(), getPlatformShapeInfo(), 1.f), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(dataType(), identityMatrixCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), platformBuffer(), platformShapeInfo(), 1.f), LIBND4J_TYPES); tickWriteDevice(); manager.synchronize(); @@ -293,12 +293,16 @@ void NDArray::registerPrimaryUse(const std::vector& writeList, c ////////////////////////////////////////////////////////////////////////// void NDArray::syncShape() const { - cudaMemcpy(getSpecialShapeInfo(), getShapeInfo(), shape::shapeInfoByteLength(getShapeInfo()), cudaMemcpyHostToDevice); + cudaMemcpy(const_cast(specialShapeInfo()), shapeInfo(), shape::shapeInfoByteLength(shapeInfo()), cudaMemcpyHostToDevice); } ////////////////////////////////////////////////////////////////////////// -void* NDArray::specialBufferWithOffset(Nd4jLong offset) const { - return getSpecialBuffer() != nullptr ? static_cast(getSpecialBuffer()) + (offset * sizeOfT()) : nullptr; +void const* NDArray::specialBufferWithOffset(Nd4jLong offset) const { + return specialBuffer() != nullptr ? static_cast(specialBuffer()) + (offset * sizeOfT()) : nullptr; +} + +void* NDArray::specialBufferWithOffset(Nd4jLong offset){ + return specialBuffer() != nullptr ? static_cast(specialBuffer()) + (offset * sizeOfT()) : nullptr; } ////////////////////////////////////////////////////////////////////////// @@ -318,7 +322,7 @@ NDArray NDArray::tile(const std::vector& reps) const { NDArray result(*this); if(diff < 0) { // reshape to higher dimension std::vector shapeNew = reps; // need to have unities at first "diff" positions of new shape - memcpy(&shapeNew[-diff], result.getShapeInfo()+1, rankOld * sizeof(Nd4jLong)); // put old shape numbers at rest of positions + memcpy(&shapeNew[-diff], result.shapeInfo()+1, rankOld * sizeof(Nd4jLong)); // put old shape numbers at rest of positions result.reshapei(ordering(), shapeNew); } return result; // nothing to do, if diff >= 0 -> identity tile @@ -332,13 +336,13 @@ NDArray NDArray::tile(const std::vector& reps) const { NDArray result(newBuff, ShapeDescriptor(newShapeInfo), getContext()); // fill newBuff, loop through all elements of newBuff - // looping through getBuffer() goes automatically by means of getSubArrayIndex applying + // looping through buffer() goes automatically by means of getSubArrayIndex applying const auto resultLen = result.lengthOf(); auto xType = this->dataType(); auto stream = getContext()->getCudaStream(); prepareSpecialUse({&result}, {this}); - BUILD_SINGLE_SELECTOR(xType, tileKernelH, (this->getSpecialBuffer(), this->getSpecialShapeInfo(), result.getSpecialBuffer(), result.getSpecialShapeInfo(), resultLen, stream), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, tileKernelH, (this->specialBuffer(), this->specialShapeInfo(), result.specialBuffer(), result.specialShapeInfo(), resultLen, stream), LIBND4J_TYPES); registerSpecialUse({&result}, {this}); return result; @@ -354,18 +358,18 @@ void NDArray::tile(const std::vector& reps, NDArray& target) const { // evaluate true tile shapeInfo for comparison with target shapeInfo auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace()); - if(!shape::equalsSoft(newShapeInfo, target.getShapeInfo())) { + if(!shape::equalsSoft(newShapeInfo, target.shapeInfo())) { throw std::runtime_error("NDArray::tile method - shapeInfo of target array is not suitable for tile operation !"); } // fill newBuff, loop through all elements of newBuff - // looping through getBuffer() goes automatically by means of getSubArrayIndex applying + // looping through buffer() goes automatically by means of getSubArrayIndex applying const int ews = target.ews(); const int targetLen = target.lengthOf(); auto stream = getContext()->getCudaStream(); prepareSpecialUse({&target}, {this}); - BUILD_SINGLE_SELECTOR_TWICE(target.dataType(), tileKernelHH, (getSpecialBuffer(), getSpecialShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), targetLen, ews, stream), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(target.dataType(), tileKernelHH, (specialBuffer(), specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), targetLen, ews, stream), LIBND4J_TYPES); registerSpecialUse({&target}, {this}); } @@ -384,7 +388,7 @@ void NDArray::tile(NDArray& target) const { auto stream = getContext()->getCudaStream(); prepareSpecialUse({&target}, {this}); - BUILD_SINGLE_SELECTOR_TWICE(target.dataType(), tileKernelHH, (getSpecialBuffer(), getSpecialShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), targetLen, ews, stream), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(target.dataType(), tileKernelHH, (specialBuffer(), specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), targetLen, ews, stream), LIBND4J_TYPES); registerSpecialUse({&target}, {this}); } @@ -467,7 +471,7 @@ NDArray NDArray::repeat(const int axis, const std::vector& repeats) const { const int* reps = reinterpret_cast(manager.replicatePointer(repeats.data(), repeats.size() * sizeof(int))); prepareSpecialUse({&output}, {this}); - BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getSpecialBuffer(), getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), specialBuffer(), specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES); prepareSpecialUse({&output}, {this}); manager.synchronize(); @@ -491,7 +495,7 @@ void NDArray::repeat(const int axis, const std::vector& repeats, NDArray& t const int* reps = reinterpret_cast(manager.replicatePointer(repeats.data(), repeats.size() * sizeof(int))); prepareSpecialUse({&target}, {this}); - BUILD_DOUBLE_SELECTOR(dataType(), target.dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getSpecialBuffer(), getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(dataType(), target.dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), specialBuffer(), specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES, LIBND4J_TYPES); prepareSpecialUse({&target}, {this}); manager.synchronize(); @@ -501,16 +505,20 @@ void NDArray::repeat(const int axis, const std::vector& repeats, NDArray& t //////////////////////////////////////////////////////////////////////// void* NDArray::specialBuffer() { - if (_buffer->special() == nullptr) - return getBuffer(); + if (_buffer->special() == nullptr) { + syncToDevice(); + tickReadHost(); + } // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } //////////////////////////////////////////////////////////////////////// -void* NDArray::getSpecialBuffer() const { - if (_buffer->special() == nullptr) - return getBuffer(); +void const* NDArray::specialBuffer() const { + if (_buffer->special() == nullptr) { + syncToDevice(); + tickReadHost(); + } // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } @@ -526,7 +534,7 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre printf("%s", msg); if(host) { - if(getBuffer() == nullptr || _length == 0) + if(buffer() == nullptr || _length == 0) { printf("NDArray::printActualBuffer: host buffer is nullptr !\n"); return; } const T* buff = bufferAsT(); @@ -535,7 +543,7 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre printf("\n"); } else { - if(getSpecialBuffer() == nullptr || _length == 0) + if(specialBuffer() == nullptr || _length == 0) { printf("NDArray::printSpecialBuffer: special buffer is nullptr !\n"); return; } void* pHost = operator new(sizeof(T) * _length); @@ -545,7 +553,7 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre cudaMemcpyAsync(reinterpret_cast(pHost) + i, specialBufferWithOffset(i), sizeof(T), cudaMemcpyDeviceToHost, *(getContext()->getCudaStream())); } else - cudaMemcpyAsync(pHost, getSpecialBuffer(), sizeOfT() * _length, cudaMemcpyDeviceToHost, *getContext()->getCudaStream()); + cudaMemcpyAsync(pHost, specialBuffer(), sizeOfT() * _length, cudaMemcpyDeviceToHost, *getContext()->getCudaStream()); cudaError_t cudaResult = cudaStreamSynchronize(*getContext()->getCudaStream()); if(cudaResult != 0) diff --git a/libnd4j/include/array/impl/ConstantDescriptor.cpp b/libnd4j/include/array/impl/ConstantDescriptor.cpp index ebb27090d..829ac5b34 100644 --- a/libnd4j/include/array/impl/ConstantDescriptor.cpp +++ b/libnd4j/include/array/impl/ConstantDescriptor.cpp @@ -28,7 +28,7 @@ namespace sd { _floatValues.emplace_back(values[e]); } - ConstantDescriptor::ConstantDescriptor(Nd4jLong * values, int length) { + ConstantDescriptor::ConstantDescriptor(Nd4jLong const* values, int length) { for (int e = 0; e < length; e++) _integerValues.emplace_back(values[e]); } diff --git a/libnd4j/include/array/impl/NDArrayFactory.cpp b/libnd4j/include/array/impl/NDArrayFactory.cpp index 870fdc198..f14aa9dbb 100644 --- a/libnd4j/include/array/impl/NDArrayFactory.cpp +++ b/libnd4j/include/array/impl/NDArrayFactory.cpp @@ -417,7 +417,7 @@ NDArray NDArrayFactory::create(const std::vector &values, sd::LaunchContext * NDArray res(buffer, ShapeDescriptor::vectorDescriptor(values.size(), DataTypeUtils::fromT()), context); - memcpyFromVector(res.getBuffer(), values); + memcpyFromVector(res.buffer(), values); res.tickWriteHost(); res.syncToDevice(); diff --git a/libnd4j/include/array/impl/NDArrayList.cpp b/libnd4j/include/array/impl/NDArrayList.cpp index ecd4bcaca..1aa9d2d4b 100644 --- a/libnd4j/include/array/impl/NDArrayList.cpp +++ b/libnd4j/include/array/impl/NDArrayList.cpp @@ -153,7 +153,7 @@ namespace sd { inputs[e] = _chunks[e]; } - auto inShapeInfo = inputs[0]->getShapeInfo(); + auto inShapeInfo = inputs[0]->shapeInfo(); int rank = shape::rank(inShapeInfo); NDArray* array = nullptr; diff --git a/libnd4j/include/array/impl/ShapeList.cpp b/libnd4j/include/array/impl/ShapeList.cpp index 1a883cc7e..d26132516 100644 --- a/libnd4j/include/array/impl/ShapeList.cpp +++ b/libnd4j/include/array/impl/ShapeList.cpp @@ -26,7 +26,7 @@ namespace sd { // _autoremovable = autoRemovable; // } - ShapeList::ShapeList(Nd4jLong* shape) { + ShapeList::ShapeList(const Nd4jLong* shape) { if (shape != nullptr) _shapes.push_back(shape); } @@ -36,21 +36,15 @@ namespace sd { destroy(); } - ShapeList::ShapeList(std::initializer_list shapes) { - for (auto v:shapes) - _shapes.push_back(v); - } - - ShapeList::ShapeList(std::initializer_list shapes, bool isWorkspace) : ShapeList(shapes){ + ShapeList::ShapeList(const std::vector &shapes, bool isWorkspace) : ShapeList(shapes){ _workspace = isWorkspace; } - ShapeList::ShapeList(std::vector& shapes) { - for (auto v:shapes) - _shapes.push_back(v); + ShapeList::ShapeList(const std::vector& shapes) { + _shapes = shapes; } - std::vector* ShapeList::asVector() { + std::vector* ShapeList::asVector() { return &_shapes; } @@ -66,33 +60,21 @@ namespace sd { _destroyed = true; } - int ShapeList::size() { + int ShapeList::size() const { return (int) _shapes.size(); } - Nd4jLong* ShapeList::at(int idx) { + const Nd4jLong* ShapeList::at(int idx) { if (_shapes.size() <= idx) throw std::runtime_error("Can't find requested variable by index"); return _shapes.at(idx); } - void ShapeList::push_back(Nd4jLong *shape) { + void ShapeList::push_back(const Nd4jLong *shape) { _shapes.push_back(shape); } - void ShapeList::push_back(std::vector& shape) { - int dLen = shape::shapeInfoLength(shape.at(0)); - - if (shape.size() != dLen) - throw std::runtime_error("Bad shape was passed in"); - - auto nShape = new Nd4jLong[dLen]; - std::memcpy(nShape, shape.data(), shape::shapeInfoByteLength(shape.at(0))); - - _shapes.push_back(nShape); - } - void ShapeList::detach() { for (int e = 0; e < _shapes.size(); e++) { _shapes[e] = shape::detachShape(_shapes[e]); diff --git a/libnd4j/include/array/impl/TadPack.cpp b/libnd4j/include/array/impl/TadPack.cpp index 1bd5b8f70..7a3bdbe36 100644 --- a/libnd4j/include/array/impl/TadPack.cpp +++ b/libnd4j/include/array/impl/TadPack.cpp @@ -29,18 +29,19 @@ namespace sd { _numTads = numTads; } - Nd4jLong* TadPack::primaryShapeInfo() const { + const Nd4jLong* TadPack::primaryShapeInfo() const { return reinterpret_cast(_tadShape.primary()); } - Nd4jLong* TadPack::primaryOffsets() const { + + const Nd4jLong* TadPack::primaryOffsets() const { return reinterpret_cast(_tadOffsets.primary()); } - Nd4jLong* TadPack::specialShapeInfo() const { + const Nd4jLong* TadPack::specialShapeInfo() const { return reinterpret_cast(_tadShape.special()); } - Nd4jLong* TadPack::specialOffsets() const { + const Nd4jLong* TadPack::specialOffsets() const { return reinterpret_cast(_tadOffsets.special()); } @@ -48,11 +49,11 @@ namespace sd { return _numTads; } - Nd4jLong* TadPack::platformShapeInfo() const { + const Nd4jLong* TadPack::platformShapeInfo() const { return sd::Environment::getInstance()->isCPU() ? primaryShapeInfo() : specialShapeInfo(); } - Nd4jLong* TadPack::platformOffsets() const { + const Nd4jLong* TadPack::platformOffsets() const { return sd::Environment::getInstance()->isCPU() ? primaryOffsets() : specialOffsets(); } diff --git a/libnd4j/include/graph/Context.h b/libnd4j/include/graph/Context.h index 96d7e8b12..de6608b46 100644 --- a/libnd4j/include/graph/Context.h +++ b/libnd4j/include/graph/Context.h @@ -196,12 +196,14 @@ namespace sd { #endif void setInputArray(int index, NDArray *array, bool removable = false); - void setInputArray(int index, void *buffer, void *shapeInfo, void *specialBuffer, void *specialShapeInfo); - void setInputArray(int index, void *databuffer, void *shapeInfo, void *specialShapeInfo); + void setInputArray(int index, void *buffer, void const* shapeInfo, void *specialBuffer, void const* specialShapeInfo); + void setInputArray(int index, void *buffer, void * shapeInfo, void *specialBuffer, void * specialShapeInfo); + void setInputArray(int index, void *databuffer, void const* shapeInfo, void const* specialShapeInfo); void setOutputArray(int index, NDArray *array, bool removable = false); - void setOutputArray(int index, void *buffer, void *shapeInfo, void *specialBuffer, void *specialShapeInfo); - void setOutputArray(int index, void *databuffer, void *shapeInfo, void *specialShapeInfo); + void setOutputArray(int index, void *buffer, const void * shapeInfo, void *specialBuffer, const void * specialShapeInfo); + void setOutputArray(int index, void *buffer, void * shapeInfo, void *specialBuffer, void * specialShapeInfo); + void setOutputArray(int index, void *databuffer, void const* shapeInfo, void const* specialShapeInfo); void setTArguments(double *arguments, int numberOfArguments); void setIArguments(Nd4jLong *arguments, int numberOfArguments); diff --git a/libnd4j/include/graph/impl/Context.cpp b/libnd4j/include/graph/impl/Context.cpp index 954329f42..ae5bc59a0 100644 --- a/libnd4j/include/graph/impl/Context.cpp +++ b/libnd4j/include/graph/impl/Context.cpp @@ -407,8 +407,12 @@ namespace sd { _handles.emplace_back(array); } - void Context::setInputArray(int index, void *buffer, void *shapeInfo, void *specialBuffer, void *specialShapeInfo) { - auto array = new NDArray(buffer, specialBuffer, reinterpret_cast(shapeInfo)); + void Context::setInputArray(int index, void *buffer, void * shapeInfo, void *specialBuffer, void * specialShapeInfo) { + this->setInputArray(index, buffer, const_cast(shapeInfo), specialBuffer, const_cast(specialShapeInfo)); + } + + void Context::setInputArray(int index, void *buffer, void const* shapeInfo, void *specialBuffer, void const* specialShapeInfo) { + auto array = new NDArray(buffer, specialBuffer, reinterpret_cast(shapeInfo)); if (_fastpath_in.size() < index + 1) _fastpath_in.resize(index+1); @@ -430,11 +434,15 @@ namespace sd { _handles.emplace_back(array); } - void Context::setOutputArray(int index, void *buffer, void *shapeInfo, void *specialBuffer, void *specialShapeInfo) { + void Context::setOutputArray(int index, void *buffer, void * shapeInfo, void *specialBuffer, void * specialShapeInfo) { + this->setOutputArray(index, buffer, const_cast(shapeInfo), specialBuffer, const_cast(specialShapeInfo)); + } + + void Context::setOutputArray(int index, void *buffer, const void * shapeInfo, void *specialBuffer, const void * specialShapeInfo) { if (_fastpath_out.size() < index + 1) _fastpath_out.resize(index+1); - auto array = new NDArray(buffer, specialBuffer, reinterpret_cast(shapeInfo)); + auto array = new NDArray(buffer, specialBuffer, reinterpret_cast(shapeInfo)); _fastpath_out[index] = array; _handles.emplace_back(array); @@ -443,7 +451,7 @@ namespace sd { array->setContext(_context); } - void Context::setInputArray(int index, void *vdatabuffer, void *shapeInfo, void *specialShapeInfo) { + void Context::setInputArray(int index, void *vdatabuffer, void const* shapeInfo, void const* specialShapeInfo) { auto dataBuffer = reinterpret_cast(vdatabuffer); if (_fastpath_in.size() < index + 1) @@ -451,9 +459,9 @@ namespace sd { NDArray *array; if (dataBuffer != nullptr) - array = new NDArray(dataBuffer->dataBuffer(), reinterpret_cast(shapeInfo), sd::LaunchContext::defaultContext(), dataBuffer->offset() / DataTypeUtils::sizeOf(ArrayOptions::dataType(reinterpret_cast(shapeInfo)))); + array = new NDArray(dataBuffer->dataBuffer(), reinterpret_cast(shapeInfo), sd::LaunchContext::defaultContext(), dataBuffer->offset() / DataTypeUtils::sizeOf(ArrayOptions::dataType(reinterpret_cast(shapeInfo)))); else - array = new NDArray(nullptr, nullptr, reinterpret_cast(shapeInfo)); + array = new NDArray(nullptr, nullptr, reinterpret_cast(shapeInfo)); _fastpath_in[index] = array; _handles.emplace_back(array); @@ -462,7 +470,7 @@ namespace sd { array->setContext(_context); } - void Context::setOutputArray(int index, void *vdatabuffer, void *shapeInfo, void *specialShapeInfo) { + void Context::setOutputArray(int index, void *vdatabuffer, void const* shapeInfo, void const* specialShapeInfo) { auto dataBuffer = reinterpret_cast(vdatabuffer); if (_fastpath_out.size() < index + 1) @@ -470,9 +478,9 @@ namespace sd { NDArray *array; if (dataBuffer != nullptr) - array = new NDArray(dataBuffer->dataBuffer(), reinterpret_cast(shapeInfo), sd::LaunchContext::defaultContext(), dataBuffer->offset() / DataTypeUtils::sizeOf(ArrayOptions::dataType(reinterpret_cast(shapeInfo)))); + array = new NDArray(dataBuffer->dataBuffer(), reinterpret_cast(shapeInfo), sd::LaunchContext::defaultContext(), dataBuffer->offset() / DataTypeUtils::sizeOf(ArrayOptions::dataType(reinterpret_cast(shapeInfo)))); else - array = new NDArray(nullptr, nullptr, reinterpret_cast(shapeInfo)); + array = new NDArray(nullptr, nullptr, reinterpret_cast(shapeInfo)); _fastpath_out[index] = array; _handles.emplace_back(array); diff --git a/libnd4j/include/graph/impl/Graph.cpp b/libnd4j/include/graph/impl/Graph.cpp index 15db128a8..177adbe07 100644 --- a/libnd4j/include/graph/impl/Graph.cpp +++ b/libnd4j/include/graph/impl/Graph.cpp @@ -50,8 +50,8 @@ namespace sd { Nd4jLong result = 0L; Nd4jLong lastStep = 0L; - std::vector shapes; - MAP_IMPL, Nd4jLong*> shapesMap; + std::vector shapes; + MAP_IMPL, Nd4jLong const*> shapesMap; int cntFD = 0; @@ -83,12 +83,12 @@ namespace sd { auto in = node->input()->at(0); auto block = node->getContextPrototype(); - std::vector inputShapes; + std::vector inputShapes; int *oldShape; for (auto v: *node->input()) { nd4j_debug(" inputs for estimation are: %i:%i\n", v.first, v.second); if (v.first < 0) { - inputShapes.push_back(_variableSpace->getVariable(v.first)->getNDArray()->getShapeInfo()); + inputShapes.push_back(_variableSpace->getVariable(v.first)->getNDArray()->shapeInfo()); } else { inputShapes.push_back(shapesMap.at(v)); } @@ -102,7 +102,7 @@ namespace sd { int cnt = 0; for (auto newShape: *outSha->asVector()) { std::pair pairAddr(node->id(), cnt++); - std::pair, Nd4jLong*> pairShape(pairAddr, newShape); + std::pair, Nd4jLong const*> pairShape(pairAddr, newShape); shapesMap.insert(pairShape); @@ -122,11 +122,11 @@ namespace sd { auto x = _variableSpace->getVariable(in); auto z = _variableSpace->getVariable(node->id()); - auto newShape = new Nd4jLong[shape::shapeInfoLength(x->getNDArray()->getShapeInfo())]; - memcpy(newShape, x->getNDArray()->getShapeInfo(), shape::shapeInfoByteLength(x->getNDArray()->getShapeInfo())); + auto newShape = new Nd4jLong[shape::shapeInfoLength(x->getNDArray()->shapeInfo())]; + memcpy(newShape, x->getNDArray()->shapeInfo(), shape::shapeInfoByteLength(x->getNDArray()->shapeInfo())); std::pair pairAddr(node->id(), 0); - std::pair, Nd4jLong*> pairShape(pairAddr, newShape); + std::pair, Nd4jLong const*> pairShape(pairAddr, newShape); shapesMap.insert(pairShape); @@ -141,7 +141,7 @@ namespace sd { memcpy(newShape, prevShape, shape::shapeInfoByteLength(prevShape)); std::pair pairAddr(node->id(), 0); - std::pair, Nd4jLong*> pairShape(pairAddr, newShape); + std::pair, Nd4jLong const*> pairShape(pairAddr, newShape); shapesMap.insert(pairShape); @@ -152,30 +152,30 @@ namespace sd { } } else if (node->getOpClass() == OpClass_REDUCTION) { - Nd4jLong *newShape = nullptr; + Nd4jLong const* newShape = nullptr; // if that's scalar output - we don't care about previous node if (node->getDimensions()->size() == 0 || (node->getDimensions()->size() == 1 && node->getDimensions()->at(0) == sd::DataTypeUtils::max())) { - newShape = new Nd4jLong[8]; - - newShape[0] = 2; - newShape[1] = 1; - newShape[2] = 1; - newShape[3] = 1; - newShape[4] = 1; - newShape[5] = 8192; // set type as FLOAT32 by default - newShape[6] = 1; - newShape[7] = 99; - +// auto aNewShape = new Nd4jLong[8]; +// +// aNewShape[0] = 2; +// aNewShape[1] = 1; +// aNewShape[2] = 1; +// aNewShape[3] = 1; +// aNewShape[4] = 1; +// aNewShape[5] = 8192; // set type as FLOAT32 by default +// aNewShape[6] = 1; +// aNewShape[7] = 99; + newShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataType::FLOAT32, 'c', {1,1}); } else { auto in = node->input()->at(0); - Nd4jLong *oldShape = nullptr; + Nd4jLong const* oldShape = nullptr; // calculate tads here if (in.first < 0) { auto x = _variableSpace->getVariable(in)->getNDArray(); - oldShape = x->getShapeInfo(); + oldShape = x->shapeInfo(); } else { oldShape = shapesMap.at(in); @@ -188,7 +188,7 @@ namespace sd { } std::pair pairAddr(node->id(), 0); - std::pair, Nd4jLong*> pairShape(pairAddr, newShape); + std::pair, Nd4jLong const*> pairShape(pairAddr, newShape); shapesMap.insert(pairShape); diff --git a/libnd4j/include/graph/profiling/NodeProfile.h b/libnd4j/include/graph/profiling/NodeProfile.h index 871eb5748..83f0b88fc 100644 --- a/libnd4j/include/graph/profiling/NodeProfile.h +++ b/libnd4j/include/graph/profiling/NodeProfile.h @@ -88,8 +88,8 @@ namespace sd { void setObjectsSize(Nd4jLong bytes); void setTotalSize(Nd4jLong bytes); - void addInputShape(Nd4jLong *shapeInfo); - void addOutputShape(Nd4jLong *shapeInfo); + void addInputShape(Nd4jLong const* shapeInfo); + void addOutputShape(Nd4jLong const* shapeInfo); Nd4jLong getActivationsSize() const; Nd4jLong getTemporarySize() const; diff --git a/libnd4j/include/graph/profiling/impl/NodeProfile.cpp b/libnd4j/include/graph/profiling/impl/NodeProfile.cpp index bd48fbd28..8db4472e6 100644 --- a/libnd4j/include/graph/profiling/impl/NodeProfile.cpp +++ b/libnd4j/include/graph/profiling/impl/NodeProfile.cpp @@ -116,11 +116,11 @@ namespace sd { return _executionTime; } - void NodeProfile::addInputShape(Nd4jLong *shapeInfo) { + void NodeProfile::addInputShape(Nd4jLong const* shapeInfo) { _inputShapes.emplace_back(ShapeUtils::shapeInfoAsString(shapeInfo)); } - void NodeProfile::addOutputShape(Nd4jLong *shapeInfo) { + void NodeProfile::addOutputShape(Nd4jLong const*shapeInfo) { _outputShapes.emplace_back(ShapeUtils::shapeInfoAsString(shapeInfo)); } diff --git a/libnd4j/include/helpers/ConstantShapeHelper.h b/libnd4j/include/helpers/ConstantShapeHelper.h index 4454776a4..73281c507 100644 --- a/libnd4j/include/helpers/ConstantShapeHelper.h +++ b/libnd4j/include/helpers/ConstantShapeHelper.h @@ -51,20 +51,20 @@ namespace sd { ConstantDataBuffer bufferForShapeInfo(sd::DataType dataType, char order, const std::vector &shape); ConstantDataBuffer bufferForShapeInfo(const ShapeDescriptor &descriptor); ConstantDataBuffer bufferForShapeInfo(const Nd4jLong *shapeInfo); - ConstantDataBuffer bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape); - ConstantDataBuffer createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace = nullptr, const std::vector dimensions = {}); + ConstantDataBuffer bufferForShapeInfo(sd::DataType dataType, char order, int rank, const Nd4jLong* shape); + ConstantDataBuffer createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace = nullptr, const std::vector &dimensions = {}); - Nd4jLong* emptyShapeInfo(const sd::DataType dataType); - Nd4jLong* scalarShapeInfo(const sd::DataType dataType); - Nd4jLong* vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType); - Nd4jLong* createShapeInfo(const ShapeDescriptor &descriptor); - Nd4jLong* createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape); - Nd4jLong* createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape); - Nd4jLong* createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo); + const Nd4jLong* emptyShapeInfo(sd::DataType dataType); + const Nd4jLong* scalarShapeInfo(sd::DataType dataType); + const Nd4jLong* vectorShapeInfo(Nd4jLong length, sd::DataType dataType); + const Nd4jLong* createShapeInfo(const ShapeDescriptor &descriptor); + const Nd4jLong* createShapeInfo(sd::DataType dataType, char order, const std::vector &shape); + const Nd4jLong* createShapeInfo(sd::DataType dataType, char order, int rank, const Nd4jLong* shape); + const Nd4jLong* createShapeInfo(sd::DataType dataType, const Nd4jLong* shapeInfo); - Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace); - Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal = true); + const Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace); + const Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal = true); bool checkBufferExistenceForShapeInfo(ShapeDescriptor &descriptor); diff --git a/libnd4j/include/helpers/Loops.h b/libnd4j/include/helpers/Loops.h index 508b84f20..f18bcc63d 100644 --- a/libnd4j/include/helpers/Loops.h +++ b/libnd4j/include/helpers/Loops.h @@ -41,43 +41,43 @@ namespace sd { public: template - static FORCEINLINE void loopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, E* extraParams, int64_t start, int64_t stop); + static FORCEINLINE void loopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, E* extraParams, int64_t start, int64_t stop); }; template class ReductionFloatLoops : public ReductionLoops { public: - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop); + static void innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop); }; template class ND4J_EXPORT ReductionBoolLoops : public ReductionLoops { public: - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); }; template class ND4J_EXPORT ReductionLongLoops : public ReductionLoops { public: - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); }; template class ND4J_EXPORT ReductionSameLoops : public ReductionLoops { public: - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, X* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, X* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce(X* x, Nd4jLong* xShapeInfo, X* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, X* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); }; @@ -85,10 +85,10 @@ namespace sd { class ND4J_EXPORT IndexReductionLoops { private: public: - static void wrapIndexReduce(const int opNum, void* x, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* extraParams); + static void wrapIndexReduce(int opNum, const void* x, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* extraParams); template - static void loopIndexReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams); + static void loopIndexReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams); }; @@ -98,7 +98,7 @@ namespace sd { public: template - static FORCEINLINE void loopTransform(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, E* extraParams, uint64_t threadId, uint64_t numThreads); + static FORCEINLINE void loopTransform(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, E* extraParams, uint64_t threadId, uint64_t numThreads); }; template @@ -106,20 +106,20 @@ namespace sd { public: template - static FORCEINLINE void loopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); + static FORCEINLINE void loopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); template - static FORCEINLINE void loopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); + static FORCEINLINE void loopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); - static void wrapperAll(const int opNum, X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); + static void wrapperAll(int opNum, const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); + static void innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); + static void innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); }; @@ -263,10 +263,11 @@ namespace sd { ////////////////////////////////////////////////////////////////////////////// template template - void sd::ReductionLoops::loopReduce(X* x, Nd4jLong* xShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, - E* extraParams, int64_t start, int64_t stop) { + void sd::ReductionLoops::loopReduce(const X* x, const Nd4jLong* xShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, + E* extraParams, + int64_t start, int64_t stop) { const LoopKind::Kind kindOfLoop = LoopKind::deduceKindOfLoopTadXZ(xShapeInfo, zShapeInfo, tadShapeInfo); @@ -492,9 +493,10 @@ namespace sd { ////////////////////////////////////////////////////////////////////////////// template template - void sd::TransformLoops::loopTransform(X* x, Nd4jLong* xShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - E* extraParams, uint64_t threadId, uint64_t numThreads) { + void sd::TransformLoops::loopTransform(const X* x, const Nd4jLong* xShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + E* extraParams, + uint64_t threadId, uint64_t numThreads) { const LoopKind::Kind kindOfLoop = LoopKind::deduceKindOfLoopXZ(xShapeInfo, zShapeInfo); @@ -682,11 +684,11 @@ namespace sd { ////////////////////////////////////////////////////////////////////////////// template template - void sd::Reduction3Loops::loopReduce3(X* x, Nd4jLong* xShapeInfo, - X* y, Nd4jLong* yShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - int* dims, int dimsLen, - Z* extraParameters, int64_t start, int64_t stop) { + void sd::Reduction3Loops::loopReduce3(const X* x, const Nd4jLong* xShapeInfo, + const X* y, const Nd4jLong* yShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + int* dims, int dimsLen, + Z* extraParameters, int64_t start, int64_t stop) { // both tads have same shape, however strides and ews may differ @@ -695,7 +697,7 @@ namespace sd { const Nd4jLong xLen = shape::length(xShapeInfo); const Nd4jLong yLen = shape::length(yShapeInfo); - Nd4jLong* xTadShapeInfo = nullptr, * yTadShapeInfo = nullptr, * xTadOffsets = nullptr, * yTadOffsets = nullptr; + const Nd4jLong* xTadShapeInfo = nullptr, * yTadShapeInfo = nullptr, * xTadOffsets = nullptr, * yTadOffsets = nullptr; TadPack tadPackX, tadPackY; std::vector zeroOffsets; @@ -962,12 +964,13 @@ namespace sd { ////////////////////////////////////////////////////////////////////////////// template template - void sd::Reduction3Loops::loopReduce3All(X* x, Nd4jLong* xShapeInfo, - X* y, Nd4jLong* yShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, - Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, - Z* extraParameters, int64_t start, int64_t stop) { + void sd::Reduction3Loops::loopReduce3All(const X* x, const Nd4jLong* xShapeInfo, + const X* y, const Nd4jLong* yShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, + const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, + Z* extraParameters, + int64_t start, int64_t stop) { // both tads have same shape, however strides and ews may differ diff --git a/libnd4j/include/helpers/ShapeUtils.h b/libnd4j/include/helpers/ShapeUtils.h index 8d2a119c3..cb2faa43d 100644 --- a/libnd4j/include/helpers/ShapeUtils.h +++ b/libnd4j/include/helpers/ShapeUtils.h @@ -35,28 +35,28 @@ namespace sd { static std::vector evalShapeForTensorDot(const NDArray* a, const NDArray* b, const std::vector& axesA, const std::vector& axesB, std::vector& permutAt, std::vector& permutBt, std::vector& shapeAt, std::vector& shapeBt); // evaluate resulting shape after reduce operation - static Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const NDArray& arr, const sd::DataType dataType, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); - static Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const Nd4jLong* shapeInfo, const sd::DataType dataType, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); - static Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const NDArray& arr, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); - static Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const Nd4jLong* shapeInfo, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); + static const Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const NDArray& arr, const sd::DataType dataType, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); + static const Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const Nd4jLong* shapeInfo, const sd::DataType dataType, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); + static const Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const NDArray& arr, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); + static const Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const Nd4jLong* shapeInfo, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); /** * evaluate output shape for reduce operation when input shape is empty * behavior is analogous to tf */ - static Nd4jLong* evalReduceShapeInfoEmpty(const char order, std::vector& dimensions, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, sd::memory::Workspace* workspace); + static const Nd4jLong* evalReduceShapeInfoEmpty(const char order, std::vector& dimensions, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, sd::memory::Workspace* workspace); // evaluate shape for array which is result of repeat operation applied to arr static std::vector evalRepeatShape(int axis, const std::vector& repeats, const NDArray& arr); // evaluate shapeInfo of permuted array // if setContigStrides = true, then set contiguous strides in output shapeInfo in accordance with arr order - static Nd4jLong* evalPermShapeInfo(const int* dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides = false); - static Nd4jLong* evalPermShapeInfo(const Nd4jLong* dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace); + static const Nd4jLong* evalPermShapeInfo(const int* dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides = false); + static const Nd4jLong* evalPermShapeInfo(const Nd4jLong* dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace); // evaluate shapeInfo of transposed array // if setContigStrides = true, then set contiguous strides in output shapeInfo in accordance with arr order - static Nd4jLong* evalTranspShapeInfo(const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides = false); + static const Nd4jLong* evalTranspShapeInfo(const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides = false); static bool copyVectorPart(std::vector& target, std::vector& source, int rank, int offset); @@ -67,13 +67,13 @@ namespace sd { // check whether 2 arrays have mutually broadcastable shapes // shape comparison starts from the end static bool areShapesBroadcastable(const NDArray &arr1, const NDArray &arr2); - static bool areShapesBroadcastable(Nd4jLong* shapeX, Nd4jLong* shapeY); + static bool areShapesBroadcastable(const Nd4jLong* shapeX, const Nd4jLong* shapeY); static bool areShapesBroadcastable(const std::vector& shape1, const std::vector& shape2); // check the possibility of broadcast operation, if true then return shapeInfo of resulting array // if evalMinMax == false then array with larger rank has to be passed as first argument - static bool evalBroadcastShapeInfo(const NDArray& max, const NDArray& min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace); - static bool evalBroadcastShapeInfo(Nd4jLong *max, Nd4jLong *min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace); + static bool evalBroadcastShapeInfo(const NDArray& max, const NDArray& min, const bool evalMinMax, const Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace); + static bool evalBroadcastShapeInfo(const Nd4jLong *max, const Nd4jLong *min, const bool evalMinMax, const Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace); // evaluate sorted vector of max axes to create tads along in case of simple broadcast operation // if simple broadcast is not possible then empty vector is returned @@ -88,10 +88,10 @@ namespace sd { static std::vector getDimsWithSameShape(const NDArray& max, const NDArray& min); // evaluate shapeInfo for resulting array of tile operation - static Nd4jLong* evalTileShapeInfo(const NDArray& arr, const std::vector& reps, sd::memory::Workspace* workspace); + static const Nd4jLong* evalTileShapeInfo(const NDArray& arr, const std::vector& reps, sd::memory::Workspace* workspace); // returns shape part of shapeInfo as std::vector - static std::vector pullShapeFromShapeInfo(Nd4jLong *shapeInfo); + static std::vector pullShapeFromShapeInfo(const Nd4jLong *shapeInfo); static std::string shapeAsString(const NDArray* array); static std::string shapeAsString(const std::vector& shape); @@ -104,13 +104,13 @@ namespace sd { static std::vector shapeAsVector(const Nd4jLong* shapeInfo); // evaluate shapeInfo for diagonal array which is made using input arr elements as diagonal - static Nd4jLong* evalDiagShapeInfo(const Nd4jLong* shapeInfo, sd::memory::Workspace* workspace); + static const Nd4jLong* evalDiagShapeInfo(const Nd4jLong* shapeInfo, sd::memory::Workspace* workspace); static std::vector evalBroadcastBackwardAxis(const Nd4jLong *operand, const Nd4jLong *result); // utility to calculate matrix product shape with give source shapes and additional params // returns ShapeList pointer with result shape - static Nd4jLong* matrixProductShape(Nd4jLong* theFirstShape, Nd4jLong* theSecondShape, bool shouldTranspondFirst, bool shouldTranspondSecond, sd::DataType dtype, sd::memory::Workspace* workspace); + static const Nd4jLong* matrixProductShape(const Nd4jLong* theFirstShape, const Nd4jLong* theSecondShape, bool shouldTranspondFirst, bool shouldTranspondSecond, sd::DataType dtype, sd::memory::Workspace* workspace); /** * This method evaluates permutation vector necessary for reducing of shapeFrom to shapeTo diff --git a/libnd4j/include/helpers/TAD.h b/libnd4j/include/helpers/TAD.h index 6df5a05a2..cd58e421e 100644 --- a/libnd4j/include/helpers/TAD.h +++ b/libnd4j/include/helpers/TAD.h @@ -55,20 +55,20 @@ namespace shape { Nd4jLong tadIndex = 0; int dimensionLength; int* dimension = nullptr; - Nd4jLong *shapeInfo = nullptr; - Nd4jLong *tadOnlyShapeInfo = nullptr; + Nd4jLong const* shapeInfo = nullptr; + Nd4jLong* tadOnlyShapeInfo = nullptr; Nd4jLong numTads = 0; int tadRank = 0; - Nd4jLong *tadShape = nullptr; - Nd4jLong *tadStride = nullptr; - Nd4jLong *tadOffsets = nullptr; + Nd4jLong* tadShape = nullptr; + Nd4jLong* tadStride = nullptr; + Nd4jLong* tadOffsets = nullptr; Nd4jLong tadOffsetForBlock = 0; int rank = 0; int numOnes = 0; //pointers to original int originalDimensionLength; - int *originalDimension = nullptr; - Nd4jLong *originalShapeInfo = nullptr; + int const* originalDimension = nullptr; + Nd4jLong const* originalShapeInfo = nullptr; bool squeezed = false; bool newSqueezeDimensions = false; int numOnesInMiddle = 0; @@ -81,7 +81,7 @@ namespace shape { void *ptrManager = nullptr; int *ptrOutput = nullptr; - INLINEDEF bool dimensionsDescending(int rank, int *dimensions, int length); + INLINEDEF bool dimensionsDescending(int rank, int const* dimensions, int length); #ifdef __CUDACC__ __host__ __device__ @@ -114,12 +114,12 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF void init(Nd4jLong *shapeInfo,int *dimension,int dimensionLength); + INLINEDEF void init(Nd4jLong const* shapeInfo,int const* dimension,int dimensionLength); #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF void init(int index, Nd4jLong *shapeInfo,int *dimension,int dimensionLength); + INLINEDEF void init(int index, Nd4jLong const* shapeInfo,int const* dimension,int dimensionLength); @@ -134,12 +134,12 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF void permuteShapeBufferInPlace(Nd4jLong *shapeBuffer, int* rearrange, Nd4jLong *out); + INLINEDEF void permuteShapeBufferInPlace(Nd4jLong const* shapeBuffer, int const* rearrange, Nd4jLong *out); #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF Nd4jLong* permuteShapeBuffer(Nd4jLong *shapeBuffer, int *rearrange); + INLINEDEF Nd4jLong* permuteShapeBuffer(Nd4jLong const* shapeBuffer, int *rearrange); @@ -153,7 +153,7 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF Nd4jLong lengthPerSlice(Nd4jLong *shapeBuffer); + INLINEDEF Nd4jLong lengthPerSlice(Nd4jLong const* shapeBuffer); #ifdef __CUDACC__ @@ -253,7 +253,7 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF Nd4jLong tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength); + INLINEDEF Nd4jLong tadLength(Nd4jLong const* shapeInfo, int const* dimension, int dimensionLength); /** * Computes the number @@ -263,7 +263,7 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF Nd4jLong tensorsAlongDimension(Nd4jLong *shapeInfo, int *dimension, int dimensionLength); + INLINEDEF Nd4jLong tensorsAlongDimension(Nd4jLong const* shapeInfo, int const* dimension, int dimensionLength); #ifdef __CUDACC__ @@ -337,19 +337,19 @@ namespace shape { this->wholeThing = this->numTads == 1 || ((this->dimensionLength == this->rank || this->numTads == shape::length(this->shapeInfo)) && ews == 1); } - INLINEDEF void TAD::init(int tadIndex, Nd4jLong *shapeInfo,int *dimension,int dimensionLength) { + INLINEDEF void TAD::init(int tadIndex, Nd4jLong const* shapeInfo,int const* dimension,int dimensionLength) { this->tadIndex = tadIndex; this->init(shapeInfo, dimension, dimensionLength); } - INLINEDEF void TAD::init(Nd4jLong *shapeInfo, int *dimension,int dimensionLength) { + INLINEDEF void TAD::init(Nd4jLong const* shapeInfo, int const* dimension,int dimensionLength) { this->originalShapeInfo = shapeInfo; this->originalDimension = dimension; this->originalDimensionLength = dimensionLength; //start off as original references this->shapeInfo = shapeInfo; this->dimensionLength = dimensionLength; - this->dimension = dimension; + this->dimension = const_cast(dimension); this->rank = shape::rank(shapeInfo); this->numTads = dimensionLength == 0 ? 1 : this->tensorsAlongDimension(this->shapeInfo, this->dimension, this->dimensionLength); @@ -420,19 +420,19 @@ namespace shape { } - INLINEDEF void TAD::permuteShapeBufferInPlace(Nd4jLong* shapeBuffer, int* rearrange, Nd4jLong* out) { + INLINEDEF void TAD::permuteShapeBufferInPlace(Nd4jLong const* shapeBuffer, int const* rearrange, Nd4jLong* out) { memcpy(out, shapeBuffer, sizeof(Nd4jLong) * shape::shapeInfoLength(this->rank)); doPermuteShapeInfo(out, rearrange); } - INLINEDEF Nd4jLong* TAD::permuteShapeBuffer(Nd4jLong* shapeBuffer, int *rearrange) { + INLINEDEF Nd4jLong* TAD::permuteShapeBuffer(Nd4jLong const* shapeBuffer, int *rearrange) { int len = shape::shapeInfoLength(this->rank); Nd4jLong *copy = shape::copyOf(len,shapeBuffer); doPermuteShapeInfo(copy,rearrange); return copy; } - INLINEDEF bool TAD::dimensionsDescending(int rank, int *dimensions, int length) { + INLINEDEF bool TAD::dimensionsDescending(int rank, int const* dimensions, int length) { int desired = rank - 1; for (int e = length - 1; e >= 0; e--) { if (dimensions[e] != desired--) @@ -465,7 +465,7 @@ namespace shape { this->tadStride = shape::stride(this->tadOnlyShapeInfo); } - INLINEDEF Nd4jLong TAD::lengthPerSlice(Nd4jLong* shapeBuffer) { + INLINEDEF Nd4jLong TAD::lengthPerSlice(Nd4jLong const* shapeBuffer) { int dimension = 0; Nd4jLong *remove = shape::removeIndex(shape::shapeOf(shapeBuffer),&dimension,shape::rank(shapeBuffer),1); Nd4jLong prod = shape::prodLong(remove, shape::rank(shapeBuffer) - 1); @@ -635,7 +635,7 @@ namespace shape { } - INLINEDEF Nd4jLong* TAD::tensorShape() { + INLINEDEF Nd4jLong* TAD::tensorShape(){ if(this->tadShape != nullptr) return this->tadShape; @@ -902,7 +902,7 @@ namespace shape { } - INLINEDEF Nd4jLong TAD::tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength) { + INLINEDEF Nd4jLong TAD::tadLength(Nd4jLong const* shapeInfo, int const* dimension, int dimensionLength) { if(dimensionLength == 1) { return shape::shapeOf(shapeInfo)[dimension[0]]; } @@ -919,7 +919,7 @@ namespace shape { } - INLINEDEF Nd4jLong TAD::tensorsAlongDimension(Nd4jLong *shapeInfo, int *dimension, int dimensionLength) { + INLINEDEF Nd4jLong TAD::tensorsAlongDimension(Nd4jLong const* shapeInfo, int const* dimension, int dimensionLength) { return shape::length(shapeInfo) / this->tadLength(shapeInfo,dimension,dimensionLength); } diff --git a/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp b/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp index a69614906..fc8abe8aa 100644 --- a/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp +++ b/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp @@ -55,22 +55,16 @@ namespace sd { ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) { int deviceId = 0; - _mutex.lock(); + std::lock_guard lock(_mutex); if (_cache[deviceId].count(descriptor) == 0) { auto hPtr = descriptor.toShapeInfo(); ConstantDataBuffer buffer(hPtr, nullptr, shape::shapeInfoLength(hPtr)*sizeof(Nd4jLong), DataType::INT64); ShapeDescriptor descriptor1(descriptor); _cache[deviceId][descriptor1] = buffer; - auto r = _cache[deviceId][descriptor1]; - _mutex.unlock(); - - return r; + return _cache[deviceId][descriptor1]; } else { - auto r = _cache[deviceId].at(descriptor); - _mutex.unlock(); - - return r; + return _cache[deviceId].at(descriptor); } } @@ -82,52 +76,45 @@ namespace sd { bool ConstantShapeHelper::checkBufferExistenceForShapeInfo(ShapeDescriptor &descriptor) { bool result; int deviceId = 0; - _mutex.lock(); + std::lock_guard lock(_mutex); - if (_cache[deviceId].count(descriptor) == 0) - result = false; - else - result = true; - - _mutex.unlock(); - - return result; + return _cache[deviceId].count(descriptor) != 0; } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { + const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { ShapeDescriptor descriptor(dataType, order, shape, rank); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { + const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { return ConstantShapeHelper::createShapeInfo(dataType, shape::order(shapeInfo), shape::rank(shapeInfo), shape::shapeOf(const_cast(shapeInfo))); } - Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { + const Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::emptyDescriptor(dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { + const Nd4jLong* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::scalarDescriptor(dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { + const Nd4jLong* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { auto descriptor = ShapeDescriptor::vectorDescriptor(length, dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { + const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { ShapeDescriptor descriptor(dataType, order, shape); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { + const Nd4jLong* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { + const Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { ShapeDescriptor descriptor(shapeInfo); auto result = createShapeInfo(descriptor); @@ -137,7 +124,7 @@ namespace sd { return result; } - Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace) { + const Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace) { ShapeDescriptor descriptor(shapeInfo); auto result = createShapeInfo(descriptor); @@ -148,7 +135,7 @@ namespace sd { //////////////////////////////////////////////////////////////////////// -ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector dimensions) { +ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector &dimensions) { Nd4jLong* newShapeInfo = nullptr; ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(shape::rank(maxShapeInfo)), Nd4jLong); diff --git a/libnd4j/include/helpers/cpu/MmulHelper.cpp b/libnd4j/include/helpers/cpu/MmulHelper.cpp index 73f3e54bd..26a6643c3 100644 --- a/libnd4j/include/helpers/cpu/MmulHelper.cpp +++ b/libnd4j/include/helpers/cpu/MmulHelper.cpp @@ -44,9 +44,9 @@ static void usualGemm(const NDArray* vA, const NDArray* vB, NDArray* vC, const bool betaPersent = beta; - const Nd4jLong* aShapeInfo = vA->getShapeInfo(); - const Nd4jLong* bShapeInfo = vB->getShapeInfo(); - const Nd4jLong* cShapeInfo = vC->getShapeInfo(); + const Nd4jLong* aShapeInfo = vA->shapeInfo(); + const Nd4jLong* bShapeInfo = vB->shapeInfo(); + const Nd4jLong* cShapeInfo = vC->shapeInfo(); const int aRank = vA->rankOf(); const int bRank = vB->rankOf(); @@ -111,9 +111,9 @@ static void usualGemv(const NDArray* vA, const NDArray* vX, NDArray* vY, const const bool betaPersent = beta; - const Nd4jLong* aShapeInfo = vA->getShapeInfo(); - const Nd4jLong* xShapeInfo = vX->getShapeInfo(); - const Nd4jLong* yShapeInfo = vY->getShapeInfo(); + const Nd4jLong* aShapeInfo = vA->shapeInfo(); + const Nd4jLong* xShapeInfo = vX->shapeInfo(); + const Nd4jLong* yShapeInfo = vY->shapeInfo(); const int N = vX->lengthOf(); const int M = vY->lengthOf(); @@ -294,13 +294,13 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, if(A->rankOf() != 2) throw std::runtime_error("MmulHelper::mmulMxV: rank of A array is not equal 2 !"); - if(!shape::isCommonVector(X->getShapeInfo(), xLenDim)) + if(!shape::isCommonVector(X->shapeInfo(), xLenDim)) throw std::runtime_error("MmulHelper::mmulMxV: X array must be vector !"); const auto M = A->sizeAt(0); const auto N = A->sizeAt(1); - if(Y != nullptr && !shape::isCommonVector(Y->getShapeInfo(), yLenDim)) + if(Y != nullptr && !shape::isCommonVector(Y->shapeInfo(), yLenDim)) throw std::runtime_error("MmulHelper::mmulMxV: Y array must be vector !"); if(X->lengthOf() != N) throw std::runtime_error("MmulHelper::mmulMxV: X vector has wrong length !"); @@ -347,10 +347,10 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, // choose appropriate cuda gemm api depending on data types if(typeDouble) { - BlasHelper::getInstance()->dgemv()(blasOrder, CblasNoTrans, M, N, alpha, (double*)pA->getBuffer(), lda, (double*)X->getBuffer(), incx, beta, (double*)Y->getBuffer(), incy); + BlasHelper::getInstance()->dgemv()(blasOrder, CblasNoTrans, M, N, alpha, (double*)pA->buffer(), lda, (double*)X->buffer(), incx, beta, (double*)Y->buffer(), incy); } else if(typeFloat) { - BlasHelper::getInstance()->sgemv()(blasOrder, CblasNoTrans, M, N, (float)alpha, (float*)pA->getBuffer(), lda, (float*)X->getBuffer(), incx, (float)beta, (float*)Y->getBuffer(), incy); + BlasHelper::getInstance()->sgemv()(blasOrder, CblasNoTrans, M, N, (float)alpha, (float*)pA->buffer(), lda, (float*)X->buffer(), incx, (float)beta, (float*)Y->buffer(), incy); } if(pA != A) @@ -371,9 +371,9 @@ NDArray* MmulHelper::dot(const NDArray* X, const NDArray* Y, sd::NDArray* Z, con int xLenDim(0), yLenDim(0); - if(!shape::isCommonVector(X->getShapeInfo(), xLenDim)) + if(!shape::isCommonVector(X->shapeInfo(), xLenDim)) throw std::runtime_error("MmulHelper::dot: X array must be vector !"); - if(!shape::isCommonVector(Y->getShapeInfo(), yLenDim)) + if(!shape::isCommonVector(Y->shapeInfo(), yLenDim)) throw std::runtime_error("MmulHelper::dot: Y array must be vector !"); if(Z != nullptr && !Z->isScalar()) throw std::runtime_error("MmulHelper::dot: Z array must be scalar !"); @@ -393,8 +393,8 @@ NDArray* MmulHelper::dot(const NDArray* X, const NDArray* Y, sd::NDArray* Z, con const auto yType = Y->dataType(); const auto zType = Z->dataType(); - BUILD_SINGLE_SELECTOR_THRICE(xType, usualDot, (length, alpha, X->getBuffer(), incx, Y->getBuffer(), incy, beta, Z->getBuffer()), NUMERIC_TYPES); - //BUILD_TRIPLE_SELECTOR(xType, yType, zType, usualDot, (length, alpha, X->getBuffer(), incx, Y->getBuffer(), incy, beta, Z->getBuffer()), LIBND4J_TYPES, FLOAT_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(xType, usualDot, (length, alpha, X->buffer(), incx, Y->buffer(), incy, beta, Z->buffer()), NUMERIC_TYPES); + //BUILD_TRIPLE_SELECTOR(xType, yType, zType, usualDot, (length, alpha, X->buffer(), incx, Y->buffer(), incy, beta, Z->buffer()), LIBND4J_TYPES, FLOAT_TYPES, FLOAT_TYPES); return Z; } @@ -419,9 +419,9 @@ static void batchedGemm(const NDArray* vA, const NDArray* vB, NDArray* vC, const bool betaPersent = beta; - const Nd4jLong* aShapeInfo = vA->getShapeInfo(); - const Nd4jLong* bShapeInfo = vB->getShapeInfo(); - const Nd4jLong* cShapeInfo = vC->getShapeInfo(); + const Nd4jLong* aShapeInfo = vA->shapeInfo(); + const Nd4jLong* bShapeInfo = vB->shapeInfo(); + const Nd4jLong* cShapeInfo = vC->shapeInfo(); const int aRank = vA->rankOf(); const int bRank = vB->rankOf(); @@ -576,13 +576,13 @@ NDArray* MmulHelper::mmulNxN(const NDArray* A, const NDArray* B, NDArray* C, con // multiplication const std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(C->rankOf(), {-2, -1}); - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(C->getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(C->shapeInfo(), dimsToExclude); std::vector idxRanges(2 * C->rankOf()); // #pragma omp parallel for schedule(guided) firstprivate(idxRanges) for(Nd4jLong i = 0; i < numOfSubArrs; ++i) { - ShapeUtils::evalIdxRangesForSubArr(i, C->getShapeInfo(), dimsToExclude, idxRanges.data()); + ShapeUtils::evalIdxRangesForSubArr(i, C->shapeInfo(), dimsToExclude, idxRanges.data()); NDArray cSubArr = (*C)(idxRanges); if(aRank > bRank) { diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp index fe6019b5a..a64f0fc91 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp @@ -26,10 +26,10 @@ using namespace simdOps; ////////////////////////////////////////////////////////////////////////////// template template -void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, - X* extraParams) { +void sd::IndexReductionLoops::loopIndexReduce(const X* x, const Nd4jLong* xShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, + X* extraParams) { sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(xShapeInfo, zShapeInfo, tadShapeInfo); if(kindOfLoop == sd::LoopKind::SMALLARR2DX) @@ -305,8 +305,8 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } template -void sd::IndexReductionLoops::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* vz, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams) { - auto x = reinterpret_cast(vx); +void sd::IndexReductionLoops::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp index 68ae29fc9..97318dae8 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp index fe68715ca..680bf7a64 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp index 8627003fd..e22635b85 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp index 8b2f4e1a7..f85096f0a 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp index e87921565..5272eba7e 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp index 062b006fd..683d6d0c0 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp index 4182de6fd..0ff70b7b5 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp index 53a4ed23f..64d93c5e3 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp index 2cf4b6ae7..dd586ab26 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp index b6b1da4a0..bb7ef80f7 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp index de4cf1872..8d0c55ce1 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp index 71a19bab2..7c5824559 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp index 22d430e9e..3bb6e6b7c 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp index c2434f63a..49f977901 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp index be628bb63..73f0e9872 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp index a5e8a596f..b27aaf341 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp index 3e96a0574..452184acd 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp index a6c02301f..59cbc51cf 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp index e461c9bcd..51fc49cea 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp index 33e5ba403..b774dde52 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp index f721c5994..00b15673b 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp @@ -28,7 +28,7 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); #endif @@ -36,21 +36,21 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); #endif } template - void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); #endif } template - void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp index 19a248896..da8d3db7e 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp @@ -28,7 +28,7 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); #endif @@ -36,21 +36,21 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); #endif } template - void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); #endif } template - void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp index e90050e4e..06588a2fb 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp @@ -28,7 +28,7 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); #endif @@ -36,21 +36,21 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); #endif } template - void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); #endif } template - void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp index d109d1013..405b0275b 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp @@ -28,7 +28,7 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); #endif @@ -36,21 +36,21 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); #endif } template - void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); #endif } template - void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp index 31ec60d93..e122717fc 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp @@ -26,17 +26,18 @@ namespace sd { template template - void ReductionBoolLoops::innerloopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { + void ReductionBoolLoops::innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionBoolLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - X *extraParams, int64_t start, int64_t stop) { + void ReductionBoolLoops::wrapper(const int opNum, + const X *x, const Nd4jLong *xShapeInfo, + Y *z, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + X *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_BOOL_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp index f4243d1c9..a3879bee3 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp @@ -28,16 +28,18 @@ namespace sd { template template - void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, + Y *z, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + Y *extraParams, + int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp index 1c5b46d40..6dd555037 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp @@ -28,16 +28,19 @@ namespace sd { template template - void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::wrapper(const int opNum, + const X *x, const Nd4jLong *xShapeInfo, + Y *z, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + Y *extraParams, + int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp index 08ca08cdb..ce1042b88 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp @@ -28,16 +28,16 @@ namespace sd { template template - void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z, + const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp index 7735c2125..6cfac93bc 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp @@ -28,16 +28,16 @@ namespace sd { template template - void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z, + const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp index e4f4ab2e0..be6cb28bd 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp @@ -33,16 +33,16 @@ namespace sd { template template - void ReductionLongLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z *z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { + void ReductionLongLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z *z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionLongLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, X *extraParams, int64_t start, int64_t stop) { + void ReductionLongLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z, + const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, X *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_LONG_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp index 6188a90f5..53725de83 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp @@ -26,16 +26,16 @@ namespace sd { template template - void ReductionSameLoops::innerloopReduce(X* x, Nd4jLong* xShapeInfo, X* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { + void ReductionSameLoops::innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, X* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionSameLoops::wrapper(const int opNum, X *vx, Nd4jLong *xShapeInfo, X *vz, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, + void ReductionSameLoops::wrapper(const int opNum, const X *vx, const Nd4jLong *xShapeInfo, X *vz, + const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, X *vextraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS auto x = reinterpret_cast(vx); diff --git a/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu b/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu index ebce6aac5..2026dbb04 100644 --- a/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu +++ b/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu @@ -83,40 +83,40 @@ namespace sd { return _cache[deviceId].count(descriptor) != 0; } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { + Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { ShapeDescriptor descriptor(dataType, order, shape, rank); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { + Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { return ConstantShapeHelper::createShapeInfo(dataType, shape::order(shapeInfo), shape::rank(shapeInfo), shape::shapeOf(const_cast(shapeInfo))); } - Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { + Nd4jLong const* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::emptyDescriptor(dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { + Nd4jLong const* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::scalarDescriptor(dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { + Nd4jLong const* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { auto descriptor = ShapeDescriptor::vectorDescriptor(length, dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { + Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { ShapeDescriptor descriptor(dataType, order, shape); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { + Nd4jLong const* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { + Nd4jLong const* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { ShapeDescriptor descriptor(shapeInfo); auto result = createShapeInfo(descriptor); @@ -126,7 +126,7 @@ namespace sd { return result; } - Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace) { + Nd4jLong const* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace) { ShapeDescriptor descriptor(shapeInfo); auto result = createShapeInfo(descriptor); @@ -136,7 +136,7 @@ namespace sd { } //////////////////////////////////////////////////////////////////////// -ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector dimensions) { +ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector& dimensions) { Nd4jLong* newShapeInfo = nullptr; ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(shape::rank(maxShapeInfo)), Nd4jLong); diff --git a/libnd4j/include/helpers/cuda_off/MmulHelper.cu b/libnd4j/include/helpers/cuda_off/MmulHelper.cu index fd1cd5813..0a3b466bc 100644 --- a/libnd4j/include/helpers/cuda_off/MmulHelper.cu +++ b/libnd4j/include/helpers/cuda_off/MmulHelper.cu @@ -268,8 +268,8 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou const int sharedMem = threadsPerBlock * sizeof(int) * 6 + 128; // 6 = aRank + bRank + cRank NDArray::prepareSpecialUse({C}, {A, B}); - // BUILD_TRIPLE_SELECTOR(aType, bType, cType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->getSpecialBuffer(), A->getSpecialShapeInfo(), B->getSpecialBuffer(), B->getSpecialShapeInfo(), C->getSpecialBuffer(), C->getSpecialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); - BUILD_SINGLE_SELECTOR_THRICE(aType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->getSpecialBuffer(), A->getSpecialShapeInfo(), B->getSpecialBuffer(), B->getSpecialShapeInfo(), C->getSpecialBuffer(), C->getSpecialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES) + // BUILD_TRIPLE_SELECTOR(aType, bType, cType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(aType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES) NDArray::registerSpecialUse({C}, {A, B}); auto cudaResult = cudaStreamSynchronize(*stream); @@ -319,23 +319,23 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou // choose appropriate cuda gemm api depending on data types if(typeDouble) { - status = cublasDgemm(*handle, transAblas, transBblas, M, N, K, &alpha, (double*)pA->getSpecialBuffer(), lda, (double*)pB->getSpecialBuffer(), ldb, &beta, (double*)pC->getSpecialBuffer(), ldc); + status = cublasDgemm(*handle, transAblas, transBblas, M, N, K, &alpha, (double*)pA->specialBuffer(), lda, (double*)pB->specialBuffer(), ldb, &beta, (double*)pC->specialBuffer(), ldc); } else if(typeFloat) { float alphaF(alpha), betaF(beta); - status = cublasSgemm(*handle, transAblas, transBblas, M, N, K, &alphaF, (float*)pA->getSpecialBuffer(), lda, (float*)pB->getSpecialBuffer(), ldb, &betaF, (float*)pC->getSpecialBuffer(), ldc); + status = cublasSgemm(*handle, transAblas, transBblas, M, N, K, &alphaF, (float*)pA->specialBuffer(), lda, (float*)pB->specialBuffer(), ldb, &betaF, (float*)pC->specialBuffer(), ldc); } else if(typeHalf) { float16 alphaH(alpha), betaH(beta); - status = cublasHgemm(*handle, transAblas, transBblas, M, N, K, &alphaH.data, (__half*)pA->getSpecialBuffer(), lda, (__half*)pB->getSpecialBuffer(), ldb, &betaH.data, (__half*)pC->getSpecialBuffer(), ldc); + status = cublasHgemm(*handle, transAblas, transBblas, M, N, K, &alphaH.data, (__half*)pA->specialBuffer(), lda, (__half*)pB->specialBuffer(), ldb, &betaH.data, (__half*)pC->specialBuffer(), ldc); } else if(typeIntFloat) { float alphaF(alpha), betaF(beta); - status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->getSpecialBuffer(), CUDA_R_8I, lda, pB->getSpecialBuffer(), CUDA_R_8I, ldb, &betaF, pC->getSpecialBuffer(), CUDA_R_32F, ldc); + status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->specialBuffer(), CUDA_R_8I, lda, pB->specialBuffer(), CUDA_R_8I, ldb, &betaF, pC->specialBuffer(), CUDA_R_32F, ldc); } else if(typeHalfFloat) { float alphaF(alpha), betaF(beta); - status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->getSpecialBuffer(), CUDA_R_16F, lda, pB->getSpecialBuffer(), CUDA_R_16F, ldb, &betaF, pC->getSpecialBuffer(), CUDA_R_32F, ldc); + status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->specialBuffer(), CUDA_R_16F, lda, pB->specialBuffer(), CUDA_R_16F, ldb, &betaF, pC->specialBuffer(), CUDA_R_32F, ldc); } if (status != CUBLAS_STATUS_SUCCESS) @@ -365,13 +365,13 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, if(A->rankOf() != 2) throw std::runtime_error("MmulHelper::mmulMxV cuda: rank of A array is not equal 2 !"); - if(!shape::isCommonVector(X->getShapeInfo(), xLenDim)) + if(!shape::isCommonVector(X->shapeInfo(), xLenDim)) throw std::runtime_error("MmulHelper::mmulMxV cuda: X array must be vector !"); const auto M = A->sizeAt(0); const auto N = A->sizeAt(1); - if(Y != nullptr && !shape::isCommonVector(Y->getShapeInfo(), yLenDim)) + if(Y != nullptr && !shape::isCommonVector(Y->shapeInfo(), yLenDim)) throw std::runtime_error("MmulHelper::mmulMxV cuda: Y array must be vector !"); if(X->lengthOf() != N) throw std::runtime_error("MmulHelper::mmulMxV cuda: X vector has wrong length !"); @@ -411,8 +411,8 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, const int blocksPerGrid = (M + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({Y}, {A, X}); - // BUILD_TRIPLE_SELECTOR(aType, xType, yType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->getSpecialBuffer(), A->getSpecialShapeInfo(), X->getSpecialBuffer(), X->getSpecialShapeInfo(), Y->getSpecialBuffer(), Y->getSpecialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); - BUILD_SINGLE_SELECTOR_THRICE(xType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->getSpecialBuffer(), A->getSpecialShapeInfo(), X->getSpecialBuffer(), X->getSpecialShapeInfo(), Y->getSpecialBuffer(), Y->getSpecialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES) + // BUILD_TRIPLE_SELECTOR(aType, xType, yType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->specialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(xType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->specialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES) NDArray::registerSpecialUse({Y}, {A, X}); auto cudaResult = cudaStreamSynchronize(*stream); @@ -442,11 +442,11 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, // choose appropriate cuda gemm api depending on data types if(typeDouble) { - status = cublasDgemv(*handle, transAblas, transA ? N : M, transA ? M : N, &alpha, (double*)pA->getSpecialBuffer(), lda, (double*)X->getSpecialBuffer(), incx, &beta, (double*)Y->getSpecialBuffer(), incy); + status = cublasDgemv(*handle, transAblas, transA ? N : M, transA ? M : N, &alpha, (double*)pA->specialBuffer(), lda, (double*)X->specialBuffer(), incx, &beta, (double*)Y->specialBuffer(), incy); } else if(typeFloat) { float alphaF(alpha), betaF(beta); - status = cublasSgemv(*handle, transAblas, transA ? N : M, transA ? M : N, &alphaF, (float*)pA->getSpecialBuffer(), lda, (float*)X->getSpecialBuffer(), incx, &betaF, (float*)Y->getSpecialBuffer(), incy); + status = cublasSgemv(*handle, transAblas, transA ? N : M, transA ? M : N, &alphaF, (float*)pA->specialBuffer(), lda, (float*)X->specialBuffer(), incx, &betaF, (float*)Y->specialBuffer(), incy); } if (status != CUBLAS_STATUS_SUCCESS) @@ -471,9 +471,9 @@ NDArray* MmulHelper::dot(const NDArray* X, const NDArray* Y, sd::NDArray* Z, con int xLenDim(0), yLenDim(0); - if(!shape::isCommonVector(X->getShapeInfo(), xLenDim)) + if(!shape::isCommonVector(X->shapeInfo(), xLenDim)) throw std::runtime_error("MmulHelper::dot cuda: X array must be vector !"); - if(!shape::isCommonVector(Y->getShapeInfo(), yLenDim)) + if(!shape::isCommonVector(Y->shapeInfo(), yLenDim)) throw std::runtime_error("MmulHelper::dot cuda: Y array must be vector !"); if(Z != nullptr && !Z->isScalar()) throw std::runtime_error("MmulHelper::dot cuda: Z array must be scalar !"); @@ -506,8 +506,8 @@ NDArray* MmulHelper::dot(const NDArray* X, const NDArray* Y, sd::NDArray* Z, con NDArray::prepareSpecialUse({Z}, {X, Y}); - //BUILD_TRIPLE_SELECTOR(xType, yType, zType, usualDot, (blocksPerGrid, threadsPerBlock, stream, length, alpha, X->getSpecialBuffer(), incx, Y->getSpecialBuffer(), incy, beta, Z->getSpecialBuffer()), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); - BUILD_SINGLE_SELECTOR_THRICE(xType, usualDot, (blocksPerGrid, threadsPerBlock, stream, length, alpha, X->getSpecialBuffer(), incx, Y->getSpecialBuffer(), incy, beta, Z->getSpecialBuffer()), NUMERIC_TYPES) + //BUILD_TRIPLE_SELECTOR(xType, yType, zType, usualDot, (blocksPerGrid, threadsPerBlock, stream, length, alpha, X->specialBuffer(), incx, Y->specialBuffer(), incy, beta, Z->specialBuffer()), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(xType, usualDot, (blocksPerGrid, threadsPerBlock, stream, length, alpha, X->specialBuffer(), incx, Y->specialBuffer(), incy, beta, Z->specialBuffer()), NUMERIC_TYPES) auto cudaResult = cudaStreamSynchronize(*stream); if (cudaResult != 0) throw cuda_exception::build("MmulHelper::dot cuda failed !", cudaResult); @@ -667,8 +667,8 @@ NDArray* MmulHelper::mmulNxN(const NDArray* A, const NDArray* B, NDArray* C, con cBatchDims = reinterpret_cast(manager.replicatePointer(ShapeUtils::evalDimsToExclude(cRank, {cMaxis, cNaxis}).data(), (cRank - 2) * sizeof(int))); NDArray::prepareSpecialUse({C}, {A, B}); - // BUILD_TRIPLE_SELECTOR(A->dataType(), b->dataType(), C->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, A->getContext()->getCudaStream(), A->getSpecialBuffer(), A->getSpecialShapeInfo(), B->getSpecialBuffer(), B->getSpecialShapeInfo(), C->getSpecialBuffer(), C->getSpecialShapeInfo(), aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); - BUILD_SINGLE_SELECTOR_THRICE(A->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, sharedMem, A->getContext()->getCudaStream(), A->getSpecialBuffer(), A->getSpecialShapeInfo(), B->getSpecialBuffer(), B->getSpecialShapeInfo(), C->getSpecialBuffer(), C->getSpecialShapeInfo(), aBatchDims, bBatchDims, cBatchDims, aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES) + // BUILD_TRIPLE_SELECTOR(A->dataType(), b->dataType(), C->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(A->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, sharedMem, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), aBatchDims, bBatchDims, cBatchDims, aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES) NDArray::registerSpecialUse({C}, {A, B}); manager.synchronize(); @@ -797,13 +797,13 @@ NDArray* MmulHelper::mmulNxNold1(const NDArray* A, const NDArray* B, NDArray* C, // multiplication const std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(C->rankOf(), {-2, -1}); - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(C->getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(C->shapeInfo(), dimsToExclude); std::vector idxRanges(2 * C->rankOf()); // #pragma omp parallel for schedule(guided) firstprivate(idxRanges) for(Nd4jLong i = 0; i < numOfSubArrs; ++i) { - ShapeUtils::evalIdxRangesForSubArr(i, C->getShapeInfo(), dimsToExclude, idxRanges.data()); + ShapeUtils::evalIdxRangesForSubArr(i, C->shapeInfo(), dimsToExclude, idxRanges.data()); NDArray cSubArr = (*C)(idxRanges); if(aRank > bRank) { @@ -944,18 +944,18 @@ NDArray* MmulHelper::mmulNxNold2(const NDArray* A, const NDArray* B, NDArray* C, std::vector aSubArrs(bS), bSubArrs(bS), cSubArrs(bS); if(aRank > 2) - shape::calcSubArrsShapeInfoAndOffsets(pA->getShapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); + shape::calcSubArrsShapeInfoAndOffsets(pA->shapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); for (int i = 0; i < bS; ++i) - aSubArrs[i] = aRank == 2 ? pA->getSpecialBuffer() : pA->getSpecialBuffer() + subArrOffsets[i] * pA->sizeOfT(); + aSubArrs[i] = aRank == 2 ? pA->specialBuffer() : pA->specialBuffer() + subArrOffsets[i] * pA->sizeOfT(); if(bRank > 2) - shape::calcSubArrsShapeInfoAndOffsets(pB->getShapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); + shape::calcSubArrsShapeInfoAndOffsets(pB->shapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); for (int i = 0; i < bS; ++i) - bSubArrs[i] = bRank == 2 ? pB->getSpecialBuffer() : pB->getSpecialBuffer() + subArrOffsets[i] * pB->sizeOfT(); + bSubArrs[i] = bRank == 2 ? pB->specialBuffer() : pB->specialBuffer() + subArrOffsets[i] * pB->sizeOfT(); - shape::calcSubArrsShapeInfoAndOffsets(pC->getShapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); + shape::calcSubArrsShapeInfoAndOffsets(pC->shapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); for (int i = 0; i < bS; ++i) - cSubArrs[i] = pC->getSpecialBuffer() + subArrOffsets[i] * pC->sizeOfT(); + cSubArrs[i] = pC->specialBuffer() + subArrOffsets[i] * pC->sizeOfT(); PointersManager manager(A->getContext(), "mmulNxN"); @@ -1011,7 +1011,7 @@ NDArray* MmulHelper::mmulNxNold2(const NDArray* A, const NDArray* B, NDArray* C, for(Nd4jLong i = 0; i < bS; ++i) { - ShapeUtils::evalIdxRangesForSubArr(i, pC->getShapeInfo(), dimsToExclude, idxRanges.data()); + ShapeUtils::evalIdxRangesForSubArr(i, pC->shapeInfo(), dimsToExclude, idxRanges.data()); NDArray cSubArr = (*pC)(idxRanges); if(aRank > bRank) { diff --git a/libnd4j/include/helpers/impl/MmulHelper.cpp b/libnd4j/include/helpers/impl/MmulHelper.cpp index f5b9bc829..8e37fd530 100644 --- a/libnd4j/include/helpers/impl/MmulHelper.cpp +++ b/libnd4j/include/helpers/impl/MmulHelper.cpp @@ -91,7 +91,7 @@ void sd::MmulHelper::tensorDot(const sd::NDArray* a, const sd::NDArray* b, sd::N mmul(aPR, bPR, cPR, 1.0, 0.0); - if(cPR->getBuffer() != cP->getBuffer() || cPR->getSpecialBuffer() != cP->getSpecialBuffer() ) // this means both permute and reshape have been performed on c, cP always points on c->getBuffer() + if(cPR->buffer() != cP->buffer() || cPR->specialBuffer() != cP->specialBuffer() ) // this means both permute and reshape have been performed on c, cP always points on c->buffer() cP->assign(cPR); if(aP != aPR) @@ -150,7 +150,7 @@ void sd::MmulHelper::tensorDot(const NDArray* a, const NDArray* b, NDArray* c, c // check whether new buffer allocation was happened for c array if(!whatToDoWithC.empty()) { for(int i = cArrs.size()-1; i > 0; --i) { - if(cArrs[i]->getBuffer() != cArrs[i-1]->getBuffer() || cArrs[i]->getSpecialBuffer() != cArrs[i-1]->getSpecialBuffer()) + if(cArrs[i]->buffer() != cArrs[i-1]->buffer() || cArrs[i]->specialBuffer() != cArrs[i-1]->specialBuffer()) cArrs[i-1]->assign(cArrs[i]); delete cArrs[i]; } @@ -203,8 +203,8 @@ sd::NDArray* MmulHelper::mmul(const sd::NDArray* A, const sd::NDArray* B, sd::ND int lenDim; const int aRank = A->rankOf(); const int bRank = B->rankOf(); - const bool isAVector = shape::isCommonVector(A->getShapeInfo(), lenDim); - const bool isBVector = shape::isCommonVector(B->getShapeInfo(), lenDim); + const bool isAVector = shape::isCommonVector(A->shapeInfo(), lenDim); + const bool isBVector = shape::isCommonVector(B->shapeInfo(), lenDim); // dot product of 2 vectors if(isAVector && isBVector && (aRank != 2 || aRank == 2 && (A->isSameShape(B) || bRank == 1 && A->sizeAt(1) == 1))) // (1x1x1 * 1x1) or (1x4 * 1*4) or (4x1 * 4x1) or (4x1 * 4) @@ -243,7 +243,7 @@ sd::NDArray* MmulHelper::mmul(const sd::NDArray* A, const sd::NDArray* B, sd::ND int xRank = x->rankOf(); int yRank = y->rankOf(); - auto outShape = ShapeUtils::evalShapeForMatmul(x->getShapeInfo(), y->getShapeInfo(), transX, transY); + auto outShape = ShapeUtils::evalShapeForMatmul(x->shapeInfo(), y->shapeInfo(), transX, transY); if(!z->isSameShape(outShape)) { nd4j_printf("NDArrayFactory::matmul static method: input shape of output array is wrong, actual is %s and expected is %s ! \n", ShapeUtils::shapeAsString(z).c_str(), ShapeUtils::shapeAsString(outShape).c_str()); throw std::invalid_argument(""); @@ -285,7 +285,7 @@ sd::NDArray* MmulHelper::mmul(const sd::NDArray* A, const sd::NDArray* B, sd::ND for(int i = 0; i < batchRank; ++i) dimsToExclude[i] = i; - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(xT->getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(xT->shapeInfo(), dimsToExclude); //PRAGMA_OMP_PARALLEL_FOR for(Nd4jLong i = 0; i < numOfSubArrs; ++i) { diff --git a/libnd4j/include/helpers/impl/ShapeUtils.cpp b/libnd4j/include/helpers/impl/ShapeUtils.cpp index aa8e917cc..c327004bd 100644 --- a/libnd4j/include/helpers/impl/ShapeUtils.cpp +++ b/libnd4j/include/helpers/impl/ShapeUtils.cpp @@ -118,13 +118,13 @@ std::vector ShapeUtils::evalShapeForTensorDot(const Nd4jLong* aShapeIn ////////////////////////////////////////////////////////////////////////// std::vector ShapeUtils::evalShapeForTensorDot(const NDArray* a, const NDArray* b, const std::vector& axesA, const std::vector& axesB, std::vector& permutAt, std::vector& permutBt, std::vector& shapeAt, std::vector& shapeBt) { - return evalShapeForTensorDot(a->getShapeInfo(), b->getShapeInfo(), axesA, axesB, permutAt, permutBt, shapeAt, shapeBt); + return evalShapeForTensorDot(a->shapeInfo(), b->shapeInfo(), axesA, axesB, permutAt, permutBt, shapeAt, shapeBt); } ////////////////////////////////////////////////////////////////////////// // evaluate output shape for reduce operation when input shape is empty -Nd4jLong* ShapeUtils::evalReduceShapeInfoEmpty(const char order, std::vector& dimsToExclude, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, sd::memory::Workspace* workspace) { + const Nd4jLong* ShapeUtils::evalReduceShapeInfoEmpty(const char order, std::vector& dimsToExclude, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, sd::memory::Workspace* workspace) { if (dimsToExclude.size() == 0) { // return copy of input shape Nd4jLong* outShapeInfo = ShapeBuilders::copyShapeInfoAndType(shapeInfo, dataType, true, workspace); @@ -171,22 +171,22 @@ Nd4jLong* ShapeUtils::evalReduceShapeInfoEmpty(const char order, std::vectorbufferForShapeInfo(descriptor).primaryAsT(); } -Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const NDArray& arr, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { - return evalReduceShapeInfo(order, dimsToExclude, arr, arr.dataType(), keepDims, supportOldShapes, workspace); -} + const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const NDArray& arr, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { + return evalReduceShapeInfo(order, dimsToExclude, arr, arr.dataType(), keepDims, supportOldShapes, workspace); + } -Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const Nd4jLong* shapeInfo, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { - return evalReduceShapeInfo(order, dimsToExclude, shapeInfo, ArrayOptions::dataType(shapeInfo), keepDims, supportOldShapes, workspace); -} + const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const Nd4jLong* shapeInfo, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { + return evalReduceShapeInfo(order, dimsToExclude, shapeInfo, ArrayOptions::dataType(shapeInfo), keepDims, supportOldShapes, workspace); + } ////////////////////////////////////////////////////////////////////////// -Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const NDArray& arr, const sd::DataType dataType, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { - return evalReduceShapeInfo(order, dimsToExclude, arr.getShapeInfo(), dataType, keepDims, supportOldShapes, workspace); -} + const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const NDArray& arr, const sd::DataType dataType, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { + return evalReduceShapeInfo(order, dimsToExclude, arr.shapeInfo(), dataType, keepDims, supportOldShapes, workspace); + } ////////////////////////////////////////////////////////////////////////// // evaluate shape resulting from reduce operation -Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { + const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { if(ArrayOptions::arrayType(shapeInfo) == ArrayType::EMPTY) return ShapeUtils::evalReduceShapeInfoEmpty(order, dimsToExclude, shapeInfo, dataType, keepDims, workspace); @@ -314,39 +314,39 @@ std::vector ShapeUtils::evalRepeatShape(int axis, const std::vectorbufferForShapeInfo(descriptor).primaryAsT(); -} + return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + } ////////////////////////////////////////////////////////////////////////// // evaluate shapeInfo of permuted array - Nd4jLong* ShapeUtils::evalPermShapeInfo(const Nd4jLong *dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace) { + const Nd4jLong* ShapeUtils::evalPermShapeInfo(const Nd4jLong *dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace) { std::vector dims(dimensions, dimensions + rank); return evalPermShapeInfo(dims.data(), rank, arr, workspace); @@ -354,7 +354,7 @@ Nd4jLong* ShapeUtils::evalPermShapeInfo(const int* dimensions, const int rank, c ////////////////////////////////////////////////////////////////////////// // evaluate shapeInfo of transposed array - Nd4jLong* ShapeUtils::evalTranspShapeInfo(const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides) { + const Nd4jLong* ShapeUtils::evalTranspShapeInfo(const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides) { int rank = arr.rankOf(); std::vector dimensions(rank); @@ -414,10 +414,10 @@ std::vector ShapeUtils::evalDimsToExclude(const int rank, const std::vector // check whether 2 arrays have mutually broadcastable shapes // shape comparison starts from the end bool ShapeUtils::areShapesBroadcastable(const NDArray &arr1, const NDArray &arr2) { - return areShapesBroadcastable(arr1.getShapeInfo(), arr2.getShapeInfo()); + return areShapesBroadcastable(arr1.shapeInfo(), arr2.shapeInfo()); } -bool ShapeUtils::areShapesBroadcastable(Nd4jLong *shapeInfo1, Nd4jLong *shapeInfo2) { +bool ShapeUtils::areShapesBroadcastable(const Nd4jLong *shapeInfo1, const Nd4jLong *shapeInfo2) { int minRank = shape::rank(shapeInfo1) < shape::rank(shapeInfo2) ? shape::rank(shapeInfo1) : shape::rank(shapeInfo2); for (int i = -1; i >= -minRank; --i) @@ -427,177 +427,177 @@ bool ShapeUtils::areShapesBroadcastable(Nd4jLong *shapeInfo1, Nd4jLong *shapeInf return true; } -bool ShapeUtils::areShapesBroadcastable(const std::vector& shape1, const std::vector& shape2) { + bool ShapeUtils::areShapesBroadcastable(const std::vector& shape1, const std::vector& shape2) { - const auto rank1 = shape1.size(); - const auto rank2 = shape2.size(); - const int minRank = rank1 < rank2 ? rank1 : rank2; + const auto rank1 = shape1.size(); + const auto rank2 = shape2.size(); + const int minRank = rank1 < rank2 ? rank1 : rank2; - for (int i = 1; i <= minRank; ++i) - if (shape1[rank1-i] != shape2[rank2-i] && shape1[rank1-i] != 1 && shape2[rank2-i] != 1) + for (int i = 1; i <= minRank; ++i) + if (shape1[rank1-i] != shape2[rank2-i] && shape1[rank1-i] != 1 && shape2[rank2-i] != 1) + return false; + + return true; + } + + ////////////////////////////////////////////////////////////////////////// + // check the possibility of broadcast operation, if true then return shapeInfo of resulting array + // if evalMinMax == false the array with larger rank has to be passed as first argument + bool ShapeUtils::evalBroadcastShapeInfo(const NDArray &max, const NDArray &min, const bool evalMinMax, const Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace) { + return evalBroadcastShapeInfo(max.shapeInfo(), min.shapeInfo(), evalMinMax, resultShapeInfo, workspace); + } + + bool ShapeUtils::evalBroadcastShapeInfo(const Nd4jLong *max, const Nd4jLong *min, const bool evalMinMax, const Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace) { + + // check whether broadcast operation is possible for input arrays + if(!areShapesBroadcastable(max, min)) return false; - return true; -} + auto maxShapeInfo = max; //max.shapeInfo(); + auto minShapeInfo = min; //min.shapeInfo(); -////////////////////////////////////////////////////////////////////////// -// check the possibility of broadcast operation, if true then return shapeInfo of resulting array -// if evalMinMax == false the array with larger rank has to be passed as first argument -bool ShapeUtils::evalBroadcastShapeInfo(const NDArray &max, const NDArray &min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace) { - return evalBroadcastShapeInfo(max.getShapeInfo(), min.getShapeInfo(), evalMinMax, resultShapeInfo, workspace); -} + if(evalMinMax && (shape::rank(max) < shape::rank(min))) { + maxShapeInfo = min; + minShapeInfo = max; + } -bool ShapeUtils::evalBroadcastShapeInfo(Nd4jLong *max, Nd4jLong *min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace) { + const auto maxRank = shape::rank(maxShapeInfo); + const auto minRank = shape::rank(minShapeInfo); - // check whether broadcast operation is possible for input arrays - if(!areShapesBroadcastable(max, min)) - return false; + // evaluate shapeInfo for resulting array + if(resultShapeInfo != nullptr) + throw std::runtime_error("std::runtime_error(ShapeUtils::evalBroadcastShapeInfo method: the input pointer on shapeInfo must be empty (=nullptr) !"); - auto maxShapeInfo = max; //max.getShapeInfo(); - auto minShapeInfo = min; //min.getShapeInfo(); + Nd4jLong *tmpShapeInfo = nullptr; + ALLOCATE(tmpShapeInfo, workspace, shape::shapeInfoLength(maxRank), Nd4jLong); - if(evalMinMax && (shape::rank(max) < shape::rank(min))) { - maxShapeInfo = min; - minShapeInfo = max; + // FIXME: get rid of memcpy here + memcpy(tmpShapeInfo, maxShapeInfo, shape::shapeInfoByteLength(maxRank)); + for (int i = 0; i < minRank; ++i) + if((maxShapeInfo[maxRank-i] != 0 && maxShapeInfo[maxRank-i] < minShapeInfo[minRank-i]) || minShapeInfo[minRank-i] == 0) + tmpShapeInfo[maxRank - i] = minShapeInfo[minRank-i]; + + ShapeUtils::updateStridesAndType(tmpShapeInfo, DataTypeUtils::pickPairwiseResultType(maxShapeInfo, minShapeInfo), shape::order(maxShapeInfo)); + + if (shape::isEmpty(max) || shape::isEmpty(min)) { + ArrayOptions::setPropertyBit(tmpShapeInfo, ARRAY_EMPTY); + memset(shape::stride(tmpShapeInfo), 0, shape::rank(tmpShapeInfo) * sizeof(Nd4jLong)); + } + + ShapeDescriptor descriptor(tmpShapeInfo); + RELEASE(tmpShapeInfo, workspace); + resultShapeInfo = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + + return true; } - const auto maxRank = shape::rank(maxShapeInfo); - const auto minRank = shape::rank(minShapeInfo); + ////////////////////////////////////////////////////////////////////////// + // check the possibility of broadcast operation for set of arrays, if true then return resulting broadcasted shapeInfo + bool ShapeUtils::evalCommonBroadcastShapeInfo(const std::vector& arrays, Nd4jLong*& resultShapeInfo, memory::Workspace* workspace) { - // evaluate shapeInfo for resulting array - if(resultShapeInfo != nullptr) - throw std::runtime_error("std::runtime_error(ShapeUtils::evalBroadcastShapeInfo method: the input pointer on shapeInfo must be empty (=nullptr) !"); + if(resultShapeInfo != nullptr) + throw std::runtime_error("ShapeUtils::evalCommonBroadcastShapeInfo method: the input pointer on shapeInfo must be empty (=nullptr) !"); - Nd4jLong *tmpShapeInfo = nullptr; - ALLOCATE(tmpShapeInfo, workspace, shape::shapeInfoLength(maxRank), Nd4jLong); + int size = arrays.size(); + int maxRank = arrays[size - 1]->rankOf(); - // FIXME: get rid of memcpy here - memcpy(tmpShapeInfo, maxShapeInfo, shape::shapeInfoByteLength(maxRank)); - for (int i = 0; i < minRank; ++i) - if((maxShapeInfo[maxRank-i] != 0 && maxShapeInfo[maxRank-i] < minShapeInfo[minRank-i]) || minShapeInfo[minRank-i] == 0) - tmpShapeInfo[maxRank - i] = minShapeInfo[minRank-i]; + for(int i = 0; i < size - 1; ++i) { + if(arrays[i]->rankOf() > maxRank) + maxRank = arrays[i]->rankOf(); + for(int j = i + 1; j < size; ++j) + if(!areShapesBroadcastable(*arrays[i], *arrays[j])) + return false; + } - ShapeUtils::updateStridesAndType(tmpShapeInfo, DataTypeUtils::pickPairwiseResultType(maxShapeInfo, minShapeInfo), shape::order(maxShapeInfo)); + Nd4jLong *tmpShapeInfo = nullptr; + ALLOCATE(tmpShapeInfo, workspace, shape::shapeInfoLength(maxRank), Nd4jLong); + memset(tmpShapeInfo, 0, shape::shapeInfoByteLength(maxRank)); + tmpShapeInfo[0] = maxRank; - if (shape::isEmpty(max) || shape::isEmpty(min)) { - ArrayOptions::setPropertyBit(tmpShapeInfo, ARRAY_EMPTY); - memset(shape::stride(tmpShapeInfo), 0, shape::rank(tmpShapeInfo) * sizeof(Nd4jLong)); + for(const auto& item : arrays ) { + for(int i = -1; i >= -item->rankOf(); --i) + if(tmpShapeInfo[i + 1 + maxRank] < item->sizeAt(i)) + tmpShapeInfo[i + 1 + maxRank] = item->sizeAt(i); + } + + shape::updateStrides(tmpShapeInfo, arrays[0]->ordering()); + ArrayOptions::setDataType(tmpShapeInfo, arrays[0]->dataType()); + + ShapeDescriptor descriptor(tmpShapeInfo); + RELEASE(tmpShapeInfo, workspace); + resultShapeInfo = const_cast(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor)); + + return true; } - ShapeDescriptor descriptor(tmpShapeInfo); - RELEASE(tmpShapeInfo, workspace); - resultShapeInfo = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); - return true; -} + ////////////////////////////////////////////////////////////////////////// + // return sorted vector of dimensions common (same) for two arrays, dimensions values corresponds to array with bigger rank + // for example if arr1{2,7}, arr2{2,5,4,7} then vector = {0,3} + std::vector ShapeUtils::getDimsWithSameShape(const NDArray& arr1, const NDArray& arr2) { -////////////////////////////////////////////////////////////////////////// -// check the possibility of broadcast operation for set of arrays, if true then return resulting broadcasted shapeInfo -bool ShapeUtils::evalCommonBroadcastShapeInfo(const std::vector& arrays, Nd4jLong*& resultShapeInfo, memory::Workspace* workspace) { + const NDArray *min, *max; - if(resultShapeInfo != nullptr) - throw std::runtime_error("ShapeUtils::evalCommonBroadcastShapeInfo method: the input pointer on shapeInfo must be empty (=nullptr) !"); + if(arr1.rankOf() >= arr2.rankOf()) { + max = &arr1; + min = &arr2; + } + else { + max = &arr2; + min = &arr1; + } - int size = arrays.size(); - int maxRank = arrays[size - 1]->rankOf(); + const int rankDiff = max->rankOf() - min->rankOf(); - for(int i = 0; i < size - 1; ++i) { - if(arrays[i]->rankOf() > maxRank) - maxRank = arrays[i]->rankOf(); - for(int j = i + 1; j < size; ++j) - if(!areShapesBroadcastable(*arrays[i], *arrays[j])) - return false; + std::vector dims; + + for (int i = 0; i < min->rankOf(); ++i) + if (min->sizeAt(i) == max->sizeAt(rankDiff + i)) + dims.emplace_back(rankDiff + i); + + return dims; } - Nd4jLong *tmpShapeInfo = nullptr; - ALLOCATE(tmpShapeInfo, workspace, shape::shapeInfoLength(maxRank), Nd4jLong); - memset(tmpShapeInfo, 0, shape::shapeInfoByteLength(maxRank)); - tmpShapeInfo[0] = maxRank; + ////////////////////////////////////////////////////////////////////////// + // evaluate shapeInfo for resulting array from tile operation + const Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vector& reps, sd::memory::Workspace* workspace) { + // check whether reps contains at least one zero (then throw exception) or whether all elements in reps are unities (then simply reshape or do nothing) + int repsSize = reps.size(); + Nd4jLong product = 1; + for(const auto& item : reps) + product *= item; + if(product == 0) + throw std::runtime_error("NDArray::tile method: one of the elements in reps array is zero !"); - for(const auto& item : arrays ) { - for(int i = -1; i >= -item->rankOf(); --i) - if(tmpShapeInfo[i + 1 + maxRank] < item->sizeAt(i)) - tmpShapeInfo[i + 1 + maxRank] = item->sizeAt(i); + int rankOld = arr.rankOf(); + int diff = rankOld - repsSize; + + // evaluate new shapeInfo + Nd4jLong* newShapeInfo = nullptr; + if(diff < 0) { + ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(repsSize), Nd4jLong); + newShapeInfo[0] = repsSize; // set new rank + for(int i=1; i <= -diff; ++i) + newShapeInfo[i] = 1; // set unities to be new dimensions at left-hand side of newShapeInfo shape place + memcpy(newShapeInfo + 1 - diff, arr.shapeInfo() + 1, rankOld*sizeof(Nd4jLong)); // copy old dimensions to the right-hand side of newShapeInfo shape place + for(int i=1; i <= repsSize; ++i) + newShapeInfo[i] *= reps[i - 1]; // set new shape by multiplying old dimensions by corresponding numbers from reps + } + else { + ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(rankOld), Nd4jLong); + memcpy(newShapeInfo, arr.shapeInfo(), shape::shapeInfoByteLength(rankOld)); // copy all elements of _shapeInfo to newShapeInfo + for(int i=1; i <= repsSize; ++i) + newShapeInfo[rankOld + 1 - i] *= reps[repsSize - i]; // set new shape by multiplying old dimensions by corresponding numbers from reps + } + shape::updateStrides(newShapeInfo, arr.ordering()); + ArrayOptions::setDataType(newShapeInfo, arr.dataType()); + + ShapeDescriptor descriptor(newShapeInfo); + RELEASE(newShapeInfo, workspace); + return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); } - shape::updateStrides(tmpShapeInfo, arrays[0]->ordering()); - ArrayOptions::setDataType(tmpShapeInfo, arrays[0]->dataType()); - - ShapeDescriptor descriptor(tmpShapeInfo); - RELEASE(tmpShapeInfo, workspace); - resultShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(descriptor); - - return true; -} - - -////////////////////////////////////////////////////////////////////////// -// return sorted vector of dimensions common (same) for two arrays, dimensions values corresponds to array with bigger rank -// for example if arr1{2,7}, arr2{2,5,4,7} then vector = {0,3} -std::vector ShapeUtils::getDimsWithSameShape(const NDArray& arr1, const NDArray& arr2) { - - const NDArray *min, *max; - - if(arr1.rankOf() >= arr2.rankOf()) { - max = &arr1; - min = &arr2; - } - else { - max = &arr2; - min = &arr1; - } - - const int rankDiff = max->rankOf() - min->rankOf(); - - std::vector dims; - - for (int i = 0; i < min->rankOf(); ++i) - if (min->sizeAt(i) == max->sizeAt(rankDiff + i)) - dims.emplace_back(rankDiff + i); - - return dims; -} - -////////////////////////////////////////////////////////////////////////// -// evaluate shapeInfo for resulting array from tile operation -Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vector& reps, sd::memory::Workspace* workspace) { - // check whether reps contains at least one zero (then throw exception) or whether all elements in reps are unities (then simply reshape or do nothing) - int repsSize = reps.size(); - Nd4jLong product = 1; - for(const auto& item : reps) - product *= item; - if(product == 0) - throw std::runtime_error("NDArray::tile method: one of the elements in reps array is zero !"); - - int rankOld = arr.rankOf(); - int diff = rankOld - repsSize; - - // evaluate new shapeInfo - Nd4jLong* newShapeInfo = nullptr; - if(diff < 0) { - ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(repsSize), Nd4jLong); - newShapeInfo[0] = repsSize; // set new rank - for(int i=1; i <= -diff; ++i) - newShapeInfo[i] = 1; // set unities to be new dimensions at left-hand side of newShapeInfo shape place - memcpy(newShapeInfo + 1 - diff, arr.getShapeInfo() + 1, rankOld*sizeof(Nd4jLong)); // copy old dimensions to the right-hand side of newShapeInfo shape place - for(int i=1; i <= repsSize; ++i) - newShapeInfo[i] *= reps[i - 1]; // set new shape by multiplying old dimensions by corresponding numbers from reps - } - else { - ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(rankOld), Nd4jLong); - memcpy(newShapeInfo, arr.getShapeInfo(), shape::shapeInfoByteLength(rankOld)); // copy all elements of _shapeInfo to newShapeInfo - for(int i=1; i <= repsSize; ++i) - newShapeInfo[rankOld + 1 - i] *= reps[repsSize - i]; // set new shape by multiplying old dimensions by corresponding numbers from reps - } - shape::updateStrides(newShapeInfo, arr.ordering()); - ArrayOptions::setDataType(newShapeInfo, arr.dataType()); - - ShapeDescriptor descriptor(newShapeInfo); - RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); -} - - std::vector ShapeUtils::pullShapeFromShapeInfo(Nd4jLong *shapeInfo) { + std::vector ShapeUtils::pullShapeFromShapeInfo(const Nd4jLong *shapeInfo) { std::vector shape(shape::rank(shapeInfo)); int shapeSize = shape.size(); @@ -624,7 +624,7 @@ Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vectorgetShapeInfo(); //Nd4jLong* + auto shapeBuffer = array->shapeInfo(); //Nd4jLong* int rank = (int)*shapeBuffer; result.append("["); for (int e = 0; e < rank; e++) { @@ -724,31 +724,31 @@ std::vector ShapeUtils::shapeAsVector(const Nd4jLong* shapeInfo) { ////////////////////////////////////////////////////////////////////////// // evaluate shapeInfo for diagonal array which is made using input arr elements as diagonal -Nd4jLong* ShapeUtils::evalDiagShapeInfo(const Nd4jLong* shapeInfoConst, sd::memory::Workspace* workspace){ - auto shapeInfo = const_cast(shapeInfoConst); + const Nd4jLong* ShapeUtils::evalDiagShapeInfo(const Nd4jLong* shapeInfoConst, sd::memory::Workspace* workspace){ + auto shapeInfo = const_cast(shapeInfoConst); - const auto rank = shape::rank(shapeInfo); + const auto rank = shape::rank(shapeInfo); - Nd4jLong* outputShapeInfo = nullptr; + Nd4jLong* outputShapeInfo = nullptr; - if(shape::isVector(shapeInfo) || shape::isScalar(shapeInfo)) { - ALLOCATE(outputShapeInfo, workspace, shape::shapeInfoLength(2), Nd4jLong); - outputShapeInfo[0] = 2; - outputShapeInfo[1] = outputShapeInfo[2] = shape::length(shapeInfo); + if(shape::isVector(shapeInfo) || shape::isScalar(shapeInfo)) { + ALLOCATE(outputShapeInfo, workspace, shape::shapeInfoLength(2), Nd4jLong); + outputShapeInfo[0] = 2; + outputShapeInfo[1] = outputShapeInfo[2] = shape::length(shapeInfo); + } + else { + ALLOCATE(outputShapeInfo, workspace, shape::shapeInfoLength(2*rank), Nd4jLong); + outputShapeInfo[0] = 2*rank; + for(int i = 1; i <= rank; ++i) + outputShapeInfo[i] = outputShapeInfo[i + rank] = shapeInfo[i]; + } + + ShapeUtils::updateStridesAndType(outputShapeInfo, shapeInfo, shape::order(shapeInfo)); + + auto result = ConstantShapeHelper::getInstance()->createShapeInfo(outputShapeInfo); + RELEASE(outputShapeInfo, workspace); + return result; } - else { - ALLOCATE(outputShapeInfo, workspace, shape::shapeInfoLength(2*rank), Nd4jLong); - outputShapeInfo[0] = 2*rank; - for(int i = 1; i <= rank; ++i) - outputShapeInfo[i] = outputShapeInfo[i + rank] = shapeInfo[i]; - } - - ShapeUtils::updateStridesAndType(outputShapeInfo, shapeInfo, shape::order(shapeInfo)); - - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(outputShapeInfo); - RELEASE(outputShapeInfo, workspace); - return result; -} std::vector ShapeUtils::evalBroadcastBackwardAxis(const Nd4jLong *operandShapeInfo, const Nd4jLong *resultShapeInfo) { // rRank >= oRank always !! @@ -765,83 +765,82 @@ std::vector ShapeUtils::evalBroadcastBackwardAxis(const Nd4jLong *operandSh } //////////////////////////////////////////////////////////////////////////////// -Nd4jLong* ShapeUtils::matrixProductShape(Nd4jLong* theFirstShape, Nd4jLong* theSecondShape, bool shouldTranspondFirst, bool shouldTranspondSecond, sd::DataType dtype, sd::memory::Workspace* workspace) { + const Nd4jLong* ShapeUtils::matrixProductShape(const Nd4jLong* theFirstShape, const Nd4jLong* theSecondShape, bool shouldTranspondFirst, bool shouldTranspondSecond, sd::DataType dtype, sd::memory::Workspace* workspace) { + auto inA = theFirstShape; + auto inB = theSecondShape; + Nd4jLong *shape; + ALLOCATE(shape, workspace, shape::shapeInfoLength(2), Nd4jLong); - auto inA = theFirstShape; - auto inB = theSecondShape; - Nd4jLong *shape; - ALLOCATE(shape, workspace, shape::shapeInfoLength(2), Nd4jLong); + Nd4jLong* tmpA = ShapeBuilders::copyShapeInfo(inA, true, workspace); + Nd4jLong* tmpB = ShapeBuilders::copyShapeInfo(inB, true, workspace); - Nd4jLong* tmpA = ShapeBuilders::copyShapeInfo(inA, true, workspace); - Nd4jLong* tmpB = ShapeBuilders::copyShapeInfo(inB, true, workspace); + if (shouldTranspondFirst) + shape::transposeInplace(tmpA); - if (shouldTranspondFirst) - shape::transposeInplace(tmpA); - - if (shouldTranspondSecond) - shape::transposeInplace(tmpB); + if (shouldTranspondSecond) + shape::transposeInplace(tmpB); - if (shape::rank(tmpA) == 1 && shape::isMatrix(tmpB)) { - // special case here - shape[0] = 1; - shape[1] = tmpB[2]; - Nd4jLong *newShape = ShapeBuilders::createShapeInfo(dtype, 'f', 2, shape, workspace); - - RELEASE(shape, workspace); - RELEASE(tmpA, workspace); - RELEASE(tmpB, workspace); - - return newShape; - } else if (shape::isScalar(tmpA) && shape::isScalar(tmpB)) { - // just scalar vs scalar - shape[0] = 1; - shape[1] = 1; - } else if (shape::isMatrix(tmpA) && shape::isVector(tmpB)) { - // gemv case - if (shape::rank(tmpB) == 2) { - shape[0] = tmpA[1]; + if (shape::rank(tmpA) == 1 && shape::isMatrix(tmpB)) { + // special case here + shape[0] = 1; shape[1] = tmpB[2]; - } else { - // we have new 1D shape here - auto newShape = ShapeBuilders::createVectorShapeInfo(dtype, tmpA[1], workspace); + Nd4jLong *newShape = ShapeBuilders::createShapeInfo(dtype, 'f', 2, shape, workspace); RELEASE(shape, workspace); RELEASE(tmpA, workspace); RELEASE(tmpB, workspace); return newShape; + } else if (shape::isScalar(tmpA) && shape::isScalar(tmpB)) { + // just scalar vs scalar + shape[0] = 1; + shape[1] = 1; + } else if (shape::isMatrix(tmpA) && shape::isVector(tmpB)) { + // gemv case + if (shape::rank(tmpB) == 2) { + shape[0] = tmpA[1]; + shape[1] = tmpB[2]; + } else { + // we have new 1D shape here + auto newShape = ShapeBuilders::createVectorShapeInfo(dtype, tmpA[1], workspace); + + RELEASE(shape, workspace); + RELEASE(tmpA, workspace); + RELEASE(tmpB, workspace); + + return newShape; + } + } else if ((shape::isMatrix(tmpA) && shape::isMatrix(tmpB)) || + (shape::isVector(tmpA) && shape::isMatrix(tmpB)) || + (shape::isColumnVector(tmpA) && shape::isVector(tmpB))) { + // gemm case + shape[0] = tmpA[1]; + shape[1] = tmpB[2]; + } else if ((shape::isVector(tmpA) && shape::isScalar(tmpB)) || + (shape::isScalar(tmpA) && shape::isVector(tmpB))) { + // element-wise + shape[0] = 1; + shape[1] = (int) sd::math::nd4j_max(shape::length(tmpA), shape::length(tmpB)); + } else if (shape::isRowVector(tmpA) && shape::isRowVector(tmpB)) { + // dot case + shape[0] = 1; + shape[1] = 1; + } else if (shape::isRowVector(tmpA) && shape::isColumnVector(tmpB)) { + // dot case + shape[0] = 1; + shape[1] = 1; } - } else if ((shape::isMatrix(tmpA) && shape::isMatrix(tmpB)) || - (shape::isVector(tmpA) && shape::isMatrix(tmpB)) || - (shape::isColumnVector(tmpA) && shape::isVector(tmpB))) { - // gemm case - shape[0] = tmpA[1]; - shape[1] = tmpB[2]; - } else if ((shape::isVector(tmpA) && shape::isScalar(tmpB)) || - (shape::isScalar(tmpA) && shape::isVector(tmpB))) { - // element-wise - shape[0] = 1; - shape[1] = (int) sd::math::nd4j_max(shape::length(tmpA), shape::length(tmpB)); - } else if (shape::isRowVector(tmpA) && shape::isRowVector(tmpB)) { - // dot case - shape[0] = 1; - shape[1] = 1; - } else if (shape::isRowVector(tmpA) && shape::isColumnVector(tmpB)) { - // dot case - shape[0] = 1; - shape[1] = 1; + + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'f', 2, shape); + + RELEASE(shape, workspace); + + RELEASE(tmpA, workspace); + RELEASE(tmpB, workspace); + return newShape; } - Nd4jLong *newShape = ShapeBuilders::createShapeInfo(dtype, 'f', 2, shape, workspace); - - RELEASE(shape, workspace); - - RELEASE(tmpA, workspace); - RELEASE(tmpB, workspace); - return newShape; -} - //////////////////////////////////////////////////////////////////////////////// std::vector ShapeUtils::evalPermutFromTo(const std::vector& shapeFrom, const std::vector& shapeTo) { auto rank = shapeFrom.size(); diff --git a/libnd4j/include/helpers/shape.h b/libnd4j/include/helpers/shape.h index 2c18615fc..8cde62ea1 100644 --- a/libnd4j/include/helpers/shape.h +++ b/libnd4j/include/helpers/shape.h @@ -65,7 +65,7 @@ namespace shape { * the information on an ndarray */ struct ND4J_EXPORT ShapeInformation { - _CUDA_HD ShapeInformation(Nd4jLong *shape_ = nullptr, Nd4jLong *stride_ = nullptr, char order_ = 0, int rank_ = 0, int offset_ = 0, int elementWiseStride_ = 0) + _CUDA_HD ShapeInformation(Nd4jLong* shape_ = nullptr, Nd4jLong *stride_ = nullptr, char order_ = 0, int rank_ = 0, int offset_ = 0, int elementWiseStride_ = 0) : shape(shape_), stride(stride_), order(order_), rank(rank_), offset(offset_), elementWiseStride(elementWiseStride_) {} @@ -93,19 +93,19 @@ namespace shape { ND4J_EXPORT _CUDA_HD bool shapeEquals(const int shape1Rank, const Nd4jLong *shape1, const int shape2Rank, const Nd4jLong *shape2); - ND4J_EXPORT _CUDA_HD Nd4jLong* detachShape(Nd4jLong *originalShape); + ND4J_EXPORT _CUDA_HD const Nd4jLong* detachShape(const Nd4jLong *originalShape); - ND4J_EXPORT _CUDA_HD Nd4jLong* copyShape(Nd4jLong *originalShape); + ND4J_EXPORT _CUDA_HD Nd4jLong* copyShape(Nd4jLong const* originalShape); ND4J_EXPORT _CUDA_HD bool shapeEquals(const Nd4jLong *shapeInfo1, const Nd4jLong *shapeInfo2); ND4J_EXPORT _CUDA_HD bool shapeEquals(const Nd4jLong *shapeInfo1, const Nd4jLong *shapeInfo2, const Nd4jLong *shapeInfo3); - ND4J_EXPORT _CUDA_HD bool strideEquals(int shape1Rank,Nd4jLong *shape1,int shape2Rank,Nd4jLong *shape2); + ND4J_EXPORT _CUDA_HD bool strideEquals(int const shape1Rank,Nd4jLong const* shape1,int const shape2Rank, Nd4jLong const* shape2); - ND4J_EXPORT _CUDA_HD bool strideEquals(Nd4jLong *shapeInfo1,Nd4jLong *shapeInfo2); + ND4J_EXPORT _CUDA_HD bool strideEquals(Nd4jLong const* shapeInfo1, Nd4jLong const* shapeInfo2); - ND4J_EXPORT _CUDA_HD bool strideEquals(Nd4jLong *stride1,int rank1,Nd4jLong *stride2,int rank2); + ND4J_EXPORT _CUDA_HD bool strideEquals(Nd4jLong const* stride1,int const rank1, Nd4jLong const* stride2, int const rank2); ND4J_EXPORT _CUDA_HD bool equalsSoft(const Nd4jLong *shapeA, const Nd4jLong *shapeB); @@ -128,7 +128,7 @@ namespace shape { ND4J_EXPORT _CUDA_HD int tadIndexForLinear(int linearIndex, int tadLength); - ND4J_EXPORT _CUDA_HD Nd4jLong tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength); + ND4J_EXPORT _CUDA_HD Nd4jLong tadLength(const Nd4jLong *shapeInfo, int *dimension, int dimensionLength); ND4J_EXPORT _CUDA_HD bool canReshape(const int oldRank, Nd4jLong* oldShape, const int newRank, Nd4jLong* newShape, bool isFOrder); @@ -142,17 +142,17 @@ namespace shape { * Get the shape info buffer * for the given rank and shape. */ - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong *shape); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong const* shape); - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong *shape, Nd4jLong *buffer); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong const* shape, Nd4jLong *buffer); /** * Get the shape info buffer * for the given rank and shape. */ - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong *shape); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong const* shape); - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong *shape, Nd4jLong *output); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong const* shape, Nd4jLong *output); #ifdef __CUDACC__ @@ -168,9 +168,9 @@ namespace shape { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - ND4J_EXPORT _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank); + ND4J_EXPORT _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank); - ND4J_EXPORT _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank, Nd4jLong* ret); + ND4J_EXPORT _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank, Nd4jLong* ret); /** * Computes the standard packed array strides for a given shape. @@ -180,9 +180,9 @@ namespace shape { * @return the strides for a matrix of n dimensions */ - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank); - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank, Nd4jLong* ret); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank, Nd4jLong* ret); ND4J_EXPORT _CUDA_HD void updateStrides(Nd4jLong *shape, const char order); ND4J_EXPORT _CUDA_HD void updateStrides(const int rank, const Nd4jLong *shapeOnly, Nd4jLong *stridesOnly, const char order); @@ -199,9 +199,9 @@ namespace shape { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStridesFortran(Nd4jLong *shape, int rank, int startNum); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStridesFortran(Nd4jLong const *shape, int rank, int startNum); - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStridesFortran(Nd4jLong *shape, int rank, int startNum, Nd4jLong* ret); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStridesFortran(Nd4jLong const *shape, int rank, int startNum, Nd4jLong* ret); /** * Computes the standard packed array strides for a given shape. @@ -210,9 +210,9 @@ namespace shape { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank, int startNum); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const* shape, int rank, int startNum); - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank, int startNum, Nd4jLong* ret); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank, int startNum, Nd4jLong* ret); /** * @param toCopy the shape to copy @@ -244,7 +244,7 @@ namespace shape { * @return 0 if there is no element wise stride the * element wise stride of reshape(1,length) otherwise */ - ND4J_EXPORT _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong *shape, Nd4jLong *stride, int isFOrder); + ND4J_EXPORT _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong const* shape, Nd4jLong const* stride, int isFOrder); /** * Compute the element wise stride @@ -257,11 +257,11 @@ namespace shape { * @return 0 if there is no element wise stride the * element wise stride of reshape(1,length) otherwise */ - ND4J_EXPORT _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong *shape, Nd4jLong *stride, int isFOrder, Nd4jLong *dimension, int dimensionLength); + ND4J_EXPORT _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong const* shape, Nd4jLong const* stride, int isFOrder, Nd4jLong const* dimension, int dimensionLength); - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong const* shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride); - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride, Nd4jLong *buffer); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(const Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride, Nd4jLong *buffer); /** * * @param length @@ -281,7 +281,7 @@ namespace shape { */ ND4J_EXPORT _CUDA_HD void doPermuteSwap(int length, Nd4jLong **shape, int* rearrange); - ND4J_EXPORT _CUDA_HD Nd4jLong *permuteShapeBuffer(Nd4jLong *shapeBuffer, int* rearrange); + ND4J_EXPORT _CUDA_HD Nd4jLong *permuteShapeBuffer(Nd4jLong const* shapeBuffer, int* rearrange); ND4J_EXPORT _CUDA_HD void permuteShapeBufferInPlace(Nd4jLong *shapeBuffer, int* rearrange, Nd4jLong *out); @@ -304,7 +304,7 @@ namespace shape { ND4J_EXPORT _CUDA_HD Nd4jLong* createPermuteIndexes(int originalRank, int *dimension,int dimensionLength); - ND4J_EXPORT _CUDA_HD Nd4jLong* computeResultShape(Nd4jLong *originalShapeBuffer, int *dimension,int dimensionLength); + ND4J_EXPORT _CUDA_HD Nd4jLong* computeResultShape(const Nd4jLong *originalShapeBuffer, int *dimension,int dimensionLength); /** * This method does inplace transpose of given shapeBuffer @@ -350,7 +350,7 @@ namespace shape { * @param shape the shape of the array * @param rank the rank of cthe shape */ - ND4J_EXPORT _CUDA_HD int isVector(Nd4jLong *shape, int rank); + ND4J_EXPORT _CUDA_HD int isVector(Nd4jLong const* shape, int rank); /** @@ -363,13 +363,13 @@ namespace shape { ND4J_EXPORT _CUDA_HD int isVector(const Nd4jLong *shapeInfo); - ND4J_EXPORT _CUDA_HD bool isLikeVector(Nd4jLong *shapeInfo, int& posOfNonUnityDim); + ND4J_EXPORT _CUDA_HD bool isLikeVector(Nd4jLong const* shapeInfo, int& posOfNonUnityDim); ND4J_EXPORT _CUDA_HD bool isCommonVector(const Nd4jLong *shapeInfo, int& posOfNonUnityDim); ND4J_EXPORT _CUDA_HD bool isRowVector(const Nd4jLong *shapeInfo); - ND4J_EXPORT _CUDA_HD bool isColumnVector(Nd4jLong *shapeInfo); + ND4J_EXPORT _CUDA_HD bool isColumnVector(Nd4jLong const* shapeInfo); /** * shape - input inShape is shape only, not shapeInfo @@ -401,10 +401,10 @@ namespace shape { */ template - ND4J_EXPORT _CUDA_HD T* copyOf(Nd4jLong length, T *toCopy); + ND4J_EXPORT _CUDA_HD T* copyOf(Nd4jLong length, T const* toCopy); template - ND4J_EXPORT _CUDA_HD T* copyOf(Nd4jLong length, T *toCopy, T *ret); + ND4J_EXPORT _CUDA_HD T* copyOf(Nd4jLong length, T const* toCopy, T *ret); /** * Return a copy of a buffer. @@ -413,13 +413,13 @@ namespace shape { */ template - ND4J_EXPORT _CUDA_HD void copyTo(Nd4jLong length, T *from, T *to); + ND4J_EXPORT _CUDA_HD void copyTo(Nd4jLong length, T const* from, T *to); /** * Return a copy of a buffer. * This buffer allocates memory * that must be freed elsewhere. */ - ND4J_EXPORT _CUDA_HD void copyTo(int length, Nd4jLong *from, Nd4jLong *to, Nd4jLong *indexes); + ND4J_EXPORT _CUDA_HD void copyTo(int length, Nd4jLong const* from, Nd4jLong *to, Nd4jLong *indexes); /** * Permute the given strides @@ -566,7 +566,7 @@ namespace shape { * item */ template - ND4J_EXPORT _CUDA_HD void removeIndex(T1 *data, T2 *indexes, Nd4jLong dataLength, Nd4jLong indexesLength, T1 *out); + ND4J_EXPORT _CUDA_HD void removeIndex(T1 const* data, T2 const* indexes, Nd4jLong dataLength, Nd4jLong indexesLength, T1 *out); /** * Return a copy of this array with the @@ -582,7 +582,7 @@ namespace shape { */ template - ND4J_EXPORT _CUDA_HD T1* removeIndex(T1 *data, T2 *indexes, Nd4jLong dataLength, Nd4jLong indexesLength); + ND4J_EXPORT _CUDA_HD T1* removeIndex(T1 const* data, T2 const* indexes, Nd4jLong dataLength, Nd4jLong indexesLength); /** * Iterate over a given set of indexes @@ -595,7 +595,7 @@ namespace shape { * indexes should be the indexes to exclude * indexes length should be the length of indexes */ - ND4J_EXPORT _CUDA_HD Nd4jLong* everyIndexBut(Nd4jLong *indexes,int indexesLength,int begin,int end); + ND4J_EXPORT _CUDA_HD Nd4jLong* everyIndexBut(Nd4jLong const* indexes,int indexesLength,int begin,int end); /** * Computes the offset for accessing @@ -641,7 +641,7 @@ namespace shape { * Keep the given indexes * in the data */ - ND4J_EXPORT _CUDA_HD Nd4jLong *keep(volatile Nd4jLong *data, int* index, int indexLength, int dataLength); + ND4J_EXPORT _CUDA_HD Nd4jLong *keep(volatile Nd4jLong *data, int const* index, int indexLength, int dataLength); /** * Generate reverse copy of the data @@ -651,13 +651,13 @@ namespace shape { */ template - ND4J_EXPORT _CUDA_HD T* reverseCopy(T *data, Nd4jLong length); + ND4J_EXPORT _CUDA_HD T* reverseCopy(T const* data, Nd4jLong length); template - ND4J_EXPORT _CUDA_HD void reverseCopyTo(T *from, T *to, Nd4jLong length); + ND4J_EXPORT _CUDA_HD void reverseCopyTo(T const* from, T *to, Nd4jLong length); template - ND4J_EXPORT _CUDA_HD void reverseCopyTo(T *from, T *to, Nd4jLong *indexes, Nd4jLong length); + ND4J_EXPORT _CUDA_HD void reverseCopyTo(T const* from, T *to, Nd4jLong *indexes, Nd4jLong length); template ND4J_EXPORT _CUDA_H void convertT(T1 *from, T2 *to, Nd4jLong length); @@ -670,7 +670,7 @@ namespace shape { * @return */ template - ND4J_EXPORT _CUDA_HD T* concat(T* arr1, Nd4jLong arr1Length, T* arr2, Nd4jLong arr2Length); + ND4J_EXPORT _CUDA_HD T* concat(T const* arr1, Nd4jLong const arr1Length, T const* arr2, Nd4jLong const arr2Length); /** * @@ -681,7 +681,7 @@ namespace shape { * @return */ template - ND4J_EXPORT _CUDA_HD T* concat(int numArrays, int numTotalElements, Nd4jLong **arr, Nd4jLong *lengths); + ND4J_EXPORT _CUDA_HD T* concat(int const numArrays, int const numTotalElements, Nd4jLong const**arr, Nd4jLong const* lengths); /** * Get the length per slice of the @@ -695,7 +695,7 @@ namespace shape { * @return the length per slice of the given shape * along the given dimension */ - ND4J_EXPORT _CUDA_HD Nd4jLong lengthPerSlice(int rank, Nd4jLong *shape, int *dimension, int dimensionLength); + ND4J_EXPORT _CUDA_HD Nd4jLong lengthPerSlice(int rank, Nd4jLong const* shape, int const* dimension, int dimensionLength); /** * calculates the offset for a tensor @@ -706,10 +706,10 @@ namespace shape { */ ND4J_EXPORT _CUDA_HD Nd4jLong sliceOffsetForTensor(int rank, int index, - Nd4jLong *shape, - Nd4jLong *tensorShape, + Nd4jLong const* shape, + Nd4jLong const* tensorShape, int tensorShapeLength, - int *dimension, + int const *dimension, int dimensionLength); /** @@ -1095,7 +1095,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * Length of a tad given * the shape information */ - INLINEDEF _CUDA_HD Nd4jLong tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength) { + INLINEDEF _CUDA_HD Nd4jLong tadLength(const Nd4jLong *shapeInfo, int *dimension, int dimensionLength) { if(dimensionLength == 1) { return shape::shapeOf(shapeInfo)[dimension[0]]; } @@ -1166,7 +1166,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { } - INLINEDEF _CUDA_HD bool strideEquals(int shape1Rank,Nd4jLong *shape1,int shape2Rank,Nd4jLong *shape2) { + INLINEDEF _CUDA_HD bool strideEquals(int const shape1Rank, Nd4jLong const* shape1,int const shape2Rank,Nd4jLong const* shape2) { if(shape1Rank != shape2Rank) return false; //rank not equals @@ -1178,12 +1178,12 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return true; } - INLINEDEF _CUDA_HD bool strideEquals(Nd4jLong *shapeInfo1,Nd4jLong *shapeInfo2) { + INLINEDEF _CUDA_HD bool strideEquals(Nd4jLong const* shapeInfo1,Nd4jLong const* shapeInfo2) { return shape::strideEquals(shape::rank(shapeInfo1),shape::stride(shapeInfo1),shape::rank(shapeInfo2),shape::stride(shapeInfo2)); } - INLINEDEF _CUDA_HD bool strideEquals(Nd4jLong *stride1,int rank1 , Nd4jLong *stride2, int rank2) { + INLINEDEF _CUDA_HD bool strideEquals(Nd4jLong const* stride1,int const rank1 , Nd4jLong const* stride2, int const rank2) { if(rank1 != rank2) return false; @@ -1195,7 +1195,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return true; } - INLINEDEF _CUDA_HD Nd4jLong *computeResultShape(Nd4jLong *originalShapeBuffer, int* dimension,int dimensionLength) { + INLINEDEF _CUDA_HD Nd4jLong *computeResultShape(Nd4jLong const* originalShapeBuffer, int * dimension,int dimensionLength) { Nd4jLong *retShape; int retShapeLength; if(dimensionLength == 1 && dimension[0] == 2147483647) { @@ -1236,7 +1236,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { } - INLINEDEF _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride, Nd4jLong *buffer) { + INLINEDEF _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(const Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride, Nd4jLong *buffer) { Nd4jLong *theShape = shape::shapeOf(shapeInfo); Nd4jLong *theStride = shape::stride(shapeInfo); int rank = dimensionLength == 1 ? 2 : dimensionLength; @@ -1279,7 +1279,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { } else { - Nd4jLong *newIndexes = dimension; + Nd4jLong *newIndexes = dimension; if(reverseCopyStride) shape::reverseCopyTo(theStride, retStride, newIndexes, len); else @@ -1293,7 +1293,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return ret; } - INLINEDEF _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride) { + INLINEDEF _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(const Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride) { int rank = dimensionLength == 1 ? 2 : dimensionLength; traceNew(4); @@ -1330,7 +1330,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank, int startNum) { + INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank, int startNum) { if (isVector(shape, rank)) { traceNew(5); @@ -1356,7 +1356,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return stride; } - INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank, int startNum, Nd4jLong *ret) { + INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank, int startNum, Nd4jLong *ret) { if (isVector(shape, rank)) { for (int i = 0; i < rank; i++) ret[i] = 1; @@ -1382,7 +1382,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - INLINEDEF _CUDA_HD Nd4jLong * calcStrides(Nd4jLong *shape, int rank, int startNum) { + INLINEDEF _CUDA_HD Nd4jLong * calcStrides(Nd4jLong const *shape, int rank, int startNum) { traceNew(7); @@ -1410,7 +1410,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return stride; } - INLINEDEF _CUDA_HD Nd4jLong * calcStrides(Nd4jLong *shape, int rank, int startNum, Nd4jLong* ret) { + INLINEDEF _CUDA_HD Nd4jLong * calcStrides(Nd4jLong const* shape, int rank, int startNum, Nd4jLong* ret) { if (rank == 1) { ret[0] = 1; return ret; @@ -1439,11 +1439,11 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank) { + INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank) { return calcStridesFortran(shape, rank, 1); } - INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank, Nd4jLong* ret) { + INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank, Nd4jLong* ret) { return calcStridesFortran(shape, rank, 1, ret); } @@ -1454,11 +1454,11 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - INLINEDEF _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank) { + INLINEDEF _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank) { return calcStrides(shape, rank, 1); } - INLINEDEF _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank, Nd4jLong* ret) { + INLINEDEF _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank, Nd4jLong* ret) { return calcStrides(shape, rank, 1, ret); } @@ -1541,7 +1541,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return copy; } - INLINEDEF _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong *shape, Nd4jLong *stride, int isFOrder) { + INLINEDEF _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong const* shape, Nd4jLong const* stride, int isFOrder) { if (rank == 0) return 1; @@ -1690,8 +1690,8 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { } - INLINEDEF _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong *shape, Nd4jLong *stride, int isFOrder, - Nd4jLong *dimension, int dimensionLength) { + INLINEDEF _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong const* shape, Nd4jLong const* stride, int isFOrder, + Nd4jLong const* dimension, int dimensionLength) { if(dimensionLength == 1) { return stride[dimension[0]]; } @@ -1703,13 +1703,13 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * Get the shape info buffer * for the given rank and shape. */ - INLINEDEF _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong *shape) { + INLINEDEF _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong const* shape) { Nd4jLong *stride = shape::calcStrides(shape, rank); traceNew(11); auto shapeInfo = new shape::ShapeInformation(); - shapeInfo->shape = shape; + shapeInfo->shape = const_cast(shape); shapeInfo->stride = stride; shapeInfo->offset = 0; shapeInfo->rank = rank; @@ -1728,13 +1728,13 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * * This method is used only for SoftMax */ - INLINEDEF _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong *shape, Nd4jLong *buffer) { + INLINEDEF _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong const* shape, Nd4jLong *buffer) { Nd4jLong stride[MAX_RANK]; shape::calcStrides(shape,rank, stride); shape::ShapeInformation shapeInfo; - shapeInfo.shape = shape; + shapeInfo.shape = const_cast(shape); shapeInfo.stride = stride; shapeInfo.offset = 0; shapeInfo.rank = rank; @@ -1751,13 +1751,13 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * Get the shape info buffer * for the given rank and shape. */ - INLINEDEF _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong *shape) { + INLINEDEF _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong const* shape) { auto stride = shape::calcStridesFortran(shape,rank); traceNew(12); auto shapeInfo = new shape::ShapeInformation(); - shapeInfo->shape = shape; + shapeInfo->shape = const_cast(shape); shapeInfo->stride = stride; shapeInfo->offset = 0; shapeInfo->rank = rank; @@ -1772,13 +1772,13 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return shapeInfoBuffer; } - INLINEDEF _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong *shape, Nd4jLong *output) { + INLINEDEF _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong const *shape, Nd4jLong *output) { Nd4jLong stride[MAX_RANK]; shape::calcStridesFortran(shape,rank, stride); shape::ShapeInformation shapeInfo; - shapeInfo.shape = shape; + shapeInfo.shape = const_cast(shape); shapeInfo.stride = stride; shapeInfo.offset = 0; shapeInfo.rank = rank; @@ -2049,7 +2049,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn shape::doPermuteShapeInfo(out, rearrange); } - INLINEDEF _CUDA_HD Nd4jLong *permuteShapeBuffer(Nd4jLong *shapeBuffer, int* rearrange) { + INLINEDEF _CUDA_HD Nd4jLong *permuteShapeBuffer(Nd4jLong const* shapeBuffer, int* rearrange) { auto len = shape::shapeInfoLength(shape::rank(shapeBuffer)); Nd4jLong *copy = shape::copyOf(len, shapeBuffer); shape::doPermuteShapeInfo(copy,rearrange); @@ -2238,7 +2238,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn * @param shape the shape of the array * @param rank the rank of the shape */ - INLINEDEF _CUDA_HD int isVector(Nd4jLong *shape, int rank) { + INLINEDEF _CUDA_HD int isVector(Nd4jLong const* shape, int rank) { if (rank == 0) return 0; @@ -2254,7 +2254,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn return 0; } - INLINEDEF _CUDA_HD bool isLikeVector(Nd4jLong *shapeInfo, int& posOfNonUnityDim) { + INLINEDEF _CUDA_HD bool isLikeVector(Nd4jLong const* shapeInfo, int& posOfNonUnityDim) { int numOfNonUnity = 0; for(int i = 1; i <= shapeInfo[0]; ++i) { @@ -2284,7 +2284,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn return numOfNonUnity == 1; } - INLINEDEF _CUDA_H Nd4jLong* detachShape(Nd4jLong *originalShape) { + INLINEDEF _CUDA_H Nd4jLong const* detachShape(Nd4jLong const* originalShape) { Nd4jLong *newShape = new Nd4jLong[shape::shapeInfoLength(originalShape)]; memcpy(newShape, originalShape, shape::shapeInfoByteLength(originalShape)); @@ -2292,7 +2292,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn } - INLINEDEF _CUDA_H Nd4jLong* copyShape(Nd4jLong *originalShape) { + INLINEDEF _CUDA_H Nd4jLong* copyShape(Nd4jLong const* originalShape) { Nd4jLong *newShape = new Nd4jLong[shape::shapeInfoLength(originalShape)]; memcpy(newShape, originalShape, shape::shapeInfoByteLength(originalShape)); @@ -2309,7 +2309,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn return isVector && shapeFirstOne; } - INLINEDEF _CUDA_HD bool isColumnVector(Nd4jLong *shapeInfo) { + INLINEDEF _CUDA_HD bool isColumnVector(const Nd4jLong *shapeInfo) { bool isVector = shape::isVector(shapeInfo) == 1; bool shapeFirstOne = shape::shapeOf(shapeInfo)[0] == 1; return isVector && !shapeFirstOne; @@ -2381,7 +2381,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * that must be freed elsewhere. */ template - INLINEDEF _CUDA_HD T *copyOf(Nd4jLong length, T *toCopy) { + INLINEDEF _CUDA_HD T *copyOf(Nd4jLong length, T const* toCopy) { traceNew(18); T *ret = new T[length]; @@ -2389,7 +2389,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) } template - INLINEDEF _CUDA_HD T* copyOf(Nd4jLong length, T *toCopy, T *ret) { + INLINEDEF _CUDA_HD T* copyOf(Nd4jLong length, T const* toCopy, T *ret) { memcpy(ret, toCopy, sizeof(T)*length); return ret; } @@ -2400,7 +2400,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * that must be freed elsewhere. */ template - INLINEDEF _CUDA_HD void copyTo(Nd4jLong length, T *from, T *to) { + INLINEDEF _CUDA_HD void copyTo(Nd4jLong length, T const* from, T *to) { memcpy(to, from, sizeof(T)*length); } @@ -2409,7 +2409,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * This buffer allocates memory * that must be freed elsewhere. */ - INLINEDEF _CUDA_HD void copyTo(int length, Nd4jLong *from, Nd4jLong *to, Nd4jLong *indexes) { + INLINEDEF _CUDA_HD void copyTo(int length, Nd4jLong const* from, Nd4jLong *to, Nd4jLong *indexes) { for(int i = 0; i < length; i++) { to[i] = from[indexes[i]]; } @@ -2817,7 +2817,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * item */ template - INLINEDEF _CUDA_HD void removeIndex(T1* data, T2 *indexes, Nd4jLong dataLength, Nd4jLong indexesLength, T1 *ret) { + INLINEDEF _CUDA_HD void removeIndex(T1 const* data, T2 const* indexes, Nd4jLong dataLength, Nd4jLong indexesLength, T1 *ret) { int count = 0; int absLength = dataLength - indexesLength; @@ -2850,7 +2850,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * item */ template - INLINEDEF _CUDA_HD T1* removeIndex(T1 *data, T2 *indexes, Nd4jLong dataLength, Nd4jLong indexesLength) { + INLINEDEF _CUDA_HD T1* removeIndex(T1 const* data, T2 const* indexes, Nd4jLong dataLength, Nd4jLong indexesLength) { auto lengthOfArr = dataLength - indexesLength; if(lengthOfArr < 0) { printf("Remove index call created a <= 0 length array. This was likely not intended."); @@ -2862,7 +2862,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) return ret; } - INLINEDEF _CUDA_HD Nd4jLong* everyIndexBut(Nd4jLong *indexes,int indexesLength,int begin,int end) { + INLINEDEF _CUDA_HD Nd4jLong* everyIndexBut(const Nd4jLong *indexes,int indexesLength,int begin,int end) { int len = end - indexesLength; traceNew(20); @@ -3086,7 +3086,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @param dataLength * @return */ - INLINEDEF _CUDA_HD Nd4jLong *keep(volatile Nd4jLong *data, int* index, int indexLength, int dataLength) { + INLINEDEF _CUDA_HD Nd4jLong *keep(volatile Nd4jLong *data, int const* index, int indexLength, int dataLength) { traceNew(23); @@ -3113,7 +3113,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons */ template - INLINEDEF _CUDA_HD T* reverseCopy(T *data, Nd4jLong length) { + INLINEDEF _CUDA_HD T* reverseCopy(T const* data, Nd4jLong length) { if (length < 1) return nullptr; @@ -3129,7 +3129,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons } template - INLINEDEF _CUDA_HD void reverseCopyTo(T *from, T *to, Nd4jLong length) { + INLINEDEF _CUDA_HD void reverseCopyTo(T const* from, T *to, Nd4jLong length) { if (length < 1) return; for (Nd4jLong i = 0; i <= length / 2; i++) { @@ -3140,7 +3140,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons } template - INLINEDEF _CUDA_HD void reverseCopyTo(T *from, T *to, Nd4jLong *indexes, Nd4jLong length) { + INLINEDEF _CUDA_HD void reverseCopyTo(T const* from, T *to, Nd4jLong *indexes, Nd4jLong length) { if (length < 1) return; @@ -3161,7 +3161,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @return */ template - INLINEDEF _CUDA_HD T* concat(T* arr1, Nd4jLong arr1Length, T* arr2, Nd4jLong arr2Length) { + INLINEDEF _CUDA_HD T* concat(T const* arr1, Nd4jLong const arr1Length, T const* arr2, Nd4jLong const arr2Length) { traceNew(25); @@ -3180,7 +3180,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @return */ template - INLINEDEF _CUDA_HD T *concat(Nd4jLong numArrays, Nd4jLong numTotalElements, T **arr, Nd4jLong *lengths) { + INLINEDEF _CUDA_HD T *concat(Nd4jLong const numArrays, Nd4jLong const numTotalElements, T const **arr, Nd4jLong const *lengths) { T* ret = new T[numTotalElements]; Nd4jLong count = 0; @@ -3206,7 +3206,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @return the length per slice of the given shape * along the given dimension */ - INLINEDEF _CUDA_HD Nd4jLong lengthPerSlice(int rank, Nd4jLong *shape, int* dimension, int dimensionLength) { + INLINEDEF _CUDA_HD Nd4jLong lengthPerSlice(int rank, Nd4jLong const* shape, int const* dimension, int dimensionLength) { if(shape::isVector(shape,rank)) { //return total length for row vectors if(dimensionLength == 1 && shape[0] == 1) { @@ -3230,7 +3230,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @param tensorShape * @return */ - INLINEDEF _CUDA_HD Nd4jLong sliceOffsetForTensor(int rank, int index, Nd4jLong *shape, Nd4jLong *tensorShape, int tensorShapeLength, int* dimension, int dimensionLength) { + INLINEDEF _CUDA_HD Nd4jLong sliceOffsetForTensor(int rank, int index, Nd4jLong const* shape, Nd4jLong const* tensorShape, int tensorShapeLength, int const* dimension, int dimensionLength) { auto tensorLength = prodLong(tensorShape, tensorShapeLength); auto lengthPerSlice2 = lengthPerSlice(rank, shape, dimension, dimensionLength); if (lengthPerSlice2 <= 0) { diff --git a/libnd4j/include/legacy/NativeOpExecutioner.h b/libnd4j/include/legacy/NativeOpExecutioner.h index 4d55a3357..84ab886c4 100644 --- a/libnd4j/include/legacy/NativeOpExecutioner.h +++ b/libnd4j/include/legacy/NativeOpExecutioner.h @@ -47,11 +47,11 @@ public: */ static void execIndexReduceScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); /** * @@ -68,13 +68,13 @@ public: */ static void execReduce3Scalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); /** @@ -90,13 +90,13 @@ public: */ static void execReduce3(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); /** * @@ -113,29 +113,29 @@ public: */ static void execReduce3(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *xTadOnlyShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + const Nd4jLong *xTadOnlyShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); static void execReduce3All(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets); + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets); /** * @@ -150,13 +150,13 @@ public: */ static void execIndexReduce(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); /** * @@ -170,73 +170,76 @@ public: * @param n */ static void execScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism = true); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism = true); static void execScalarBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism = true); + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism = true); static void execScalarInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism = true); + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism = true); static void execScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalars, Nd4jLong const* hScalarShapeInfo, + void const* dScalars, Nd4jLong const* dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ); static void execScalarBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalars, const Nd4jLong *hScalarShapeInfo, + const void *dScalars, const Nd4jLong *dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); static void execScalarInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalars, const Nd4jLong *hScalarShapeInfo, + const void *dScalars, const Nd4jLong *dScalarShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); /** @@ -252,105 +255,107 @@ static void execScalarInt(sd::LaunchContext *lc, * @param dimensionLength */ static void execBroadcast(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ); static void execBroadcast(sd::LaunchContext* lc, - const int opNum, - const void *hX, const Nd4jLong *hXShapeInfo, - const void *dX, const Nd4jLong *dXShapeInfo, - const void *hY, const Nd4jLong *hYShapeInfo, - const void *dY, const Nd4jLong *dYShapeInfo, - void *hZ, const Nd4jLong *hZShapeInfo, - void *dZ, const Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execInverseBroadcast(sd::LaunchContext *lc, int opNum, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *result, Nd4jLong *resultShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); static void execBroadcastBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ); - static void execBroadcastBool(sd::LaunchContext* lc, const int opNum, - const void *hX, const Nd4jLong *hXShapeInfo, - const void *dX, const Nd4jLong *dXShapeInfo, - const void *hY, const Nd4jLong *hYShapeInfo, - const void *dY, const Nd4jLong *dYShapeInfo, + static void execBroadcastBool(sd::LaunchContext* lc, + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, void *hZ, const Nd4jLong *hZShapeInfo, void *dZ, const Nd4jLong *dZShapeInfo, void *extraParams); static void execInverseBroadcastBool(sd::LaunchContext *lc, - int opNum, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *result, Nd4jLong *resultShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); static void execBroadcastInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ); - static void execBroadcastInt(sd::LaunchContext* lc, const int opNum, - const void *hX, const Nd4jLong *hXShapeInfo, - const void *dX, const Nd4jLong *dXShapeInfo, - const void *hY, const Nd4jLong *hYShapeInfo, - const void *dY, const Nd4jLong *dYShapeInfo, - void *hZ, const Nd4jLong *hZShapeInfo, - void *dZ, const Nd4jLong *dZShapeInfo); + static void execBroadcastInt(sd::LaunchContext* lc, + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execInverseBroadcastInt(sd::LaunchContext *lc, - int opNum, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *result, Nd4jLong *resultShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); /** * @@ -365,34 +370,34 @@ static void execScalarInt(sd::LaunchContext *lc, * @param n */ static void execPairwiseTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams); static void execPairwiseBoolTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams); static void execPairwiseIntTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams); /** * @@ -405,49 +410,50 @@ static void execScalarInt(sd::LaunchContext *lc, * @param n */ static void execTransformFloat(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execTransformAny(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool allowParallelism = true); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + bool allowParallelism = true); static void execTransformStrict(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execTransformSame(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execTransformBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); /** * * @param opNum @@ -458,44 +464,44 @@ static void execTransformBool(sd::LaunchContext *lc, * @param resultShapeInfo */ static void execReduceFloat(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execReduceSame(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execReduceBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execReduceLong(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); /** * @@ -506,49 +512,49 @@ static void execTransformBool(sd::LaunchContext *lc, * @return */ static void execReduceFloatScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execReduceBoolScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execReduceSameScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execReduceLongScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execReduce3TAD(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yTadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffsets); /** * @@ -562,15 +568,15 @@ static void execTransformBool(sd::LaunchContext *lc, * @param dimensionLength */ static void execSummaryStats(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - bool biasCorrected); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + bool biasCorrected); /** * @@ -582,13 +588,13 @@ static void execTransformBool(sd::LaunchContext *lc, * @param resultShapeInfo */ static void execSummaryStats(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + bool biasCorrected); /** * @@ -600,68 +606,51 @@ static void execTransformBool(sd::LaunchContext *lc, * @param resultShapeInfo */ static void execSummaryStatsScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + bool biasCorrected); static void execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraArguments); + int opNum, + Nd4jPointer state, + void *hZ, const Nd4jLong *hZShapeBuffer, + void *dZ, const Nd4jLong *dZShapeBuffer, + void *extraArguments); static void execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeBuffer, - void *dX, Nd4jLong *dXShapeBuffer, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraArguments); + int opNum, + Nd4jPointer state, + const void *hX, const Nd4jLong *hXShapeBuffer, + const void *dX, const Nd4jLong *dXShapeBuffer, + void *hZ, const Nd4jLong *hZShapeBuffer, + void *dZ, const Nd4jLong *dZShapeBuffer, + void *extraArguments); static void execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeBuffer, - void *dX, Nd4jLong *dXShapeBuffer, - void *hY, Nd4jLong *hYShapeBuffer, - void *dY, Nd4jLong *dYShapeBuffer, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraArguments); + int opNum, + Nd4jPointer state, + const void *hX, const Nd4jLong *hXShapeBuffer, + const void *dX, const Nd4jLong *dXShapeBuffer, + const void *hY, const Nd4jLong *hYShapeBuffer, + const void *dY, const Nd4jLong *dYShapeBuffer, + void *hZ, const Nd4jLong *hZShapeBuffer, + void *dZ, const Nd4jLong *dZShapeBuffer, + void *extraArguments); - template - static FORCEINLINE void execAggregate(sd::LaunchContext *lc, - int opNum, - void **varguments, - int numArguments, - Nd4jLong **shapeArguments, - int numShapeArguments, - int *indexArguments, - int numIndexArguments, - int **intArrays, - int numIntArrays, - void *vrealArguments, - int numRealArguments) { - - } - - - inline static void execSort(void *x, Nd4jLong *xShapeInfo, bool descending) { + inline static void execSort(void *x, const Nd4jLong *xShapeInfo, bool descending) { auto xType = sd::ArrayOptions::dataType(xShapeInfo); BUILD_SINGLE_SELECTOR(xType, sd::SpecialMethods, ::sortGeneric(x, xShapeInfo, descending), LIBND4J_TYPES); } - static void execSort(void *x, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending) { + static void execSort(void *x, const Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, bool descending) { auto xType = sd::ArrayOptions::dataType(xShapeInfo); BUILD_SINGLE_SELECTOR(xType, sd::SpecialMethods, ::sortTadGeneric(x, xShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending), LIBND4J_TYPES); @@ -672,13 +661,13 @@ static void execTransformBool(sd::LaunchContext *lc, } - inline static Nd4jLong encodeBitmap(void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold) { + inline static Nd4jLong encodeBitmap(void *dx, const Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold) { auto xType = sd::ArrayOptions::dataType(xShapeInfo); BUILD_SINGLE_SELECTOR(xType, return sd::SpecialMethods, ::encodeBitmapGeneric(dx, xShapeInfo, N, dz, threshold), FLOAT_TYPES); } - inline static void decodeBitmap(void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo) { + inline static void decodeBitmap(const void *dx, Nd4jLong N, void *dz, const Nd4jLong *zShapeInfo) { auto zType = sd::ArrayOptions::dataType(zShapeInfo); BUILD_SINGLE_SELECTOR(zType, sd::SpecialMethods, ::decodeBitmapGeneric(dx, N, dz, zShapeInfo), FLOAT_TYPES); diff --git a/libnd4j/include/legacy/NativeOps.h b/libnd4j/include/legacy/NativeOps.h index 97aa5a402..17affd1c3 100755 --- a/libnd4j/include/legacy/NativeOps.h +++ b/libnd4j/include/legacy/NativeOps.h @@ -122,9 +122,9 @@ ND4J_EXPORT void setTADThreshold(int num); */ ND4J_EXPORT void execIndexReduceScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); /** * @@ -139,10 +139,10 @@ ND4J_EXPORT void execIndexReduceScalar(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execIndexReduce(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); /** * @@ -159,20 +159,20 @@ ND4J_EXPORT void execIndexReduce(Nd4jPointer *extraPointers, ND4J_EXPORT void execBroadcast( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); ND4J_EXPORT void execBroadcastBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); /** * @@ -189,17 +189,17 @@ ND4J_EXPORT void execBroadcastBool( ND4J_EXPORT void execPairwiseTransform( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execPairwiseTransformBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); /** @@ -213,28 +213,28 @@ ND4J_EXPORT void execPairwiseTransformBool( */ ND4J_EXPORT void execReduceFloat(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); ND4J_EXPORT void execReduceSame(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); ND4J_EXPORT void execReduceBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); ND4J_EXPORT void execReduceLong(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); /** * @@ -247,34 +247,34 @@ ND4J_EXPORT void execReduceLong(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execReduceFloat2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); ND4J_EXPORT void execReduceSame2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); ND4J_EXPORT void execReduceBool2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); ND4J_EXPORT void execReduceLong2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); /** * @@ -289,10 +289,10 @@ ND4J_EXPORT void execReduceLong2(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execReduce3(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); /** * @@ -305,10 +305,10 @@ ND4J_EXPORT void execReduce3(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execReduce3Scalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); /** * * @param opNum @@ -324,24 +324,24 @@ ND4J_EXPORT void execReduce3Scalar(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execReduce3Tad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets); ND4J_EXPORT void execReduce3All(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets); + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* xTadShapeInfo, Nd4jLong const* xOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yOffsets); /** * @@ -356,16 +356,16 @@ ND4J_EXPORT void execReduce3All(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hSscalarShapeInfo, Nd4jLong *dSscalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalar, Nd4jLong const* hSscalarShapeInfo, Nd4jLong const* dSscalarShapeInfo, void *extraParams); ND4J_EXPORT void execScalarBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hSscalarShapeInfo, Nd4jLong *dSscalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalar, Nd4jLong const* hSscalarShapeInfo, Nd4jLong const* dSscalarShapeInfo, void *extraParams); /** @@ -377,9 +377,9 @@ ND4J_EXPORT void execScalarBool(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execSummaryStatsScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, bool biasCorrected); /** * @@ -392,9 +392,9 @@ ND4J_EXPORT void execSummaryStatsScalar(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execSummaryStats(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, bool biasCorrected); /** * @@ -409,12 +409,12 @@ ND4J_EXPORT void execSummaryStats(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execSummaryStatsTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, bool biasCorrected, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets); /** * @@ -428,32 +428,32 @@ ND4J_EXPORT void execSummaryStatsTad(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execTransformFloat(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execTransformSame(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execTransformBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execTransformAny(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execTransformStrict(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); /** @@ -471,23 +471,23 @@ ND4J_EXPORT void execTransformStrict(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execScalarTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ); ND4J_EXPORT void execScalarBoolTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ); ND4J_EXPORT void specialConcat ( Nd4jPointer *extraPointers, @@ -496,7 +496,7 @@ ND4J_EXPORT void specialConcat ( Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *result, - Nd4jLong *resultShapeInfo, + Nd4jLong const* resultShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers); @@ -792,14 +792,14 @@ typedef sd::TadPack OpaqueTadPack; * @param targetBuffer * @param offsetsBuffer */ -ND4J_EXPORT OpaqueTadPack* tadOnlyShapeInfo(Nd4jLong *xShapeInfo, +ND4J_EXPORT OpaqueTadPack* tadOnlyShapeInfo(Nd4jLong const*xShapeInfo, int *dimension, int dimensionLength); -ND4J_EXPORT Nd4jLong* getPrimaryShapeInfo(OpaqueTadPack* pack); -ND4J_EXPORT Nd4jLong* getPrimaryOffsets(OpaqueTadPack* pack); -ND4J_EXPORT Nd4jLong* getSpecialShapeInfo(OpaqueTadPack* pack); -ND4J_EXPORT Nd4jLong* getSpecialOffsets(OpaqueTadPack* pack); +ND4J_EXPORT Nd4jLong const* getPrimaryShapeInfo(OpaqueTadPack* pack); +ND4J_EXPORT Nd4jLong const* getPrimaryOffsets(OpaqueTadPack* pack); +ND4J_EXPORT Nd4jLong const* getSpecialShapeInfo(OpaqueTadPack* pack); +ND4J_EXPORT Nd4jLong const* getSpecialOffsets(OpaqueTadPack* pack); ND4J_EXPORT Nd4jLong getNumberOfTads(OpaqueTadPack* pack); ND4J_EXPORT int getShapeInfoLength(OpaqueTadPack* pack); @@ -824,14 +824,14 @@ ND4J_EXPORT void deleteTadPack(OpaqueTadPack* ptr); * @param zTadOffsets */ ND4J_EXPORT void pullRows(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *xShapeInfo, Nd4jLong *dxShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *zShapeInfo, Nd4jLong *dzShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* xShapeInfo, Nd4jLong const* dxShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* zShapeInfo, Nd4jLong const* dzShapeInfo, Nd4jLong n, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets); + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, + Nd4jLong const* zTadOffsets); /** * @@ -843,20 +843,20 @@ ND4J_EXPORT void pullRows(Nd4jPointer *extraPointers, * @param propagate */ ND4J_EXPORT void average(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dxShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, + Nd4jPointer *x, Nd4jLong const* xShapeInfo, + Nd4jPointer *dx, Nd4jLong const* dxShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void *dz, Nd4jLong const* dzShapeInfo, int n, Nd4jLong length, bool propagate); ND4J_EXPORT void accumulate(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dxShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, + Nd4jPointer *x, Nd4jLong const* xShapeInfo, + Nd4jPointer *dx, Nd4jLong const* dxShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void *dz, Nd4jLong const* dzShapeInfo, int n, Nd4jLong length); @@ -1004,7 +1004,7 @@ ND4J_EXPORT void execAggregateBatch(Nd4jPointer *extraPointers, ND4J_EXPORT void execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeBuffer, Nd4jLong *dZShapeBuffer, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeBuffer, Nd4jLong const* dZShapeBuffer, void *extraArguments); /** @@ -1023,9 +1023,9 @@ ND4J_EXPORT void execRandom(Nd4jPointer *extraPointers, ND4J_EXPORT void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeBuffer, Nd4jLong *dXShapeBuffer, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeBuffer, Nd4jLong *dYShapeBuffer, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeBuffer, Nd4jLong *dZShapeBuffer, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeBuffer, Nd4jLong const* dXShapeBuffer, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeBuffer, Nd4jLong const* dYShapeBuffer, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeBuffer, Nd4jLong const* dZShapeBuffer, void *extraArguments); /** @@ -1042,8 +1042,8 @@ ND4J_EXPORT void execRandom3(Nd4jPointer *extraPointers, ND4J_EXPORT void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeBuffer, Nd4jLong *dXShapeBuffer, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeBuffer, Nd4jLong *dZShapeBuffer, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeBuffer, Nd4jLong const* dXShapeBuffer, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeBuffer, Nd4jLong const* dZShapeBuffer, void *extraArguments); @@ -1098,11 +1098,11 @@ ND4J_EXPORT void destroyRandom(Nd4jPointer ptrRandom); */ template -static Nd4jPointer _numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) { - Nd4jLong *shapeBufferCast = reinterpret_cast(shapeBuffer); +static Nd4jPointer _numpyHeaderForNd4j(Nd4jPointer data,const Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong* headerSize) { + Nd4jLong const* shapeBufferCast = reinterpret_cast(shapeBuffer); int rank = shape::rank(shapeBufferCast); - Nd4jLong *shape = shape::shapeOf(shapeBufferCast); - unsigned int *npShape = new unsigned int[rank]; + const Nd4jLong* shape = shape::shapeOf(shapeBufferCast); + unsigned int* npShape = new unsigned int[rank]; for(int i = 0; i < rank; i++) { npShape[i] = shape[i]; } @@ -1125,7 +1125,7 @@ static Nd4jPointer _numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer, extern "C" { -static Nd4jPointer numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) { +static Nd4jPointer numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong* headerSize) { auto shapeBufferCast = reinterpret_cast(shapeBuffer); auto type = sd::ArrayOptions::dataType(shapeBufferCast); BUILD_SINGLE_SELECTOR(type, return _numpyHeaderForNd4j, (data, shapeBuffer, wordSize, headerSize), LIBND4J_TYPES); @@ -1427,53 +1427,53 @@ ND4J_EXPORT Nd4jPointer pointerForAddress(Nd4jLong address); * @return */ ND4J_EXPORT void tear(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *xShapeInfo, Nd4jLong *dxShapeInfo, - Nd4jPointer *targets, Nd4jLong *zShapeInfo, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + OpaqueDataBuffer *dbX, Nd4jLong const* xShapeInfo, Nd4jLong const* dxShapeInfo, + Nd4jPointer *targets, Nd4jLong const* zShapeInfo, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets); ND4J_EXPORT void sort(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, bool descending); ND4J_EXPORT void sortByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, bool descending); ND4J_EXPORT void sortByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, bool descending); ND4J_EXPORT void sortTad(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, bool descending); ND4J_EXPORT void sortTadByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, int *dimension, int dimensionLength, bool descending); ND4J_EXPORT void sortTadByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, int *dimension, int dimensionLength, bool descending); @@ -1509,7 +1509,7 @@ ND4J_EXPORT OpaqueShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, N ND4J_EXPORT OpaqueShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs, int *dArgs, int numDArgs); ND4J_EXPORT Nd4jLong getShapeListSize(OpaqueShapeList* list); -ND4J_EXPORT Nd4jLong* getShape(OpaqueShapeList* list, Nd4jLong i); +ND4J_EXPORT Nd4jLong const* getShape(OpaqueShapeList* list, Nd4jLong i); ND4J_EXPORT void deleteShapeList(Nd4jPointer shapeList); @@ -1526,7 +1526,7 @@ ND4J_EXPORT OpaqueVariable* getVariable(OpaqueVariablesSet* set, Nd4jLong i); ND4J_EXPORT int getVariableId(OpaqueVariable* variable); ND4J_EXPORT int getVariableIndex(OpaqueVariable* variable); ND4J_EXPORT const char* getVariableName(OpaqueVariable* variable); -ND4J_EXPORT Nd4jLong* getVariableShape(OpaqueVariable* variable); +ND4J_EXPORT Nd4jLong const* getVariableShape(OpaqueVariable* variable); ND4J_EXPORT void* getVariableBuffer(OpaqueVariable* variable); ND4J_EXPORT int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId); @@ -1545,7 +1545,7 @@ ND4J_EXPORT void deleteGraphState(Nd4jPointer state); ND4J_EXPORT void deleteResultWrapper(Nd4jPointer ptr); -ND4J_EXPORT int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer x, Nd4jLong *xShapeInfo, int N, float threshold); +ND4J_EXPORT int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer x, Nd4jLong const* xShapeInfo, int N, float threshold); // this method executes op that requires scope to be present: if/while/cond/whatever ND4J_EXPORT Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs); @@ -1557,11 +1557,11 @@ ND4J_EXPORT char* getUtf8StringBuffer(Nd4jPointer *extraPointers, Nd4jPointer pt ND4J_EXPORT void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr); ND4J_EXPORT void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, - void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, - void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, - void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, - void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, - void* hIindexes, Nd4jLong* hIndicesShapeInfo, void* dIindexes, Nd4jLong* dIndicesShapeInfo); + void* hX, Nd4jLong const* hXShapeInfo, Nd4jLong const* hXOffsets, + void* dX, Nd4jLong const* dXShapeInfo, Nd4jLong const* dXOffsets, + void* hY, Nd4jLong const* hYShapeInfo, Nd4jLong const* hYOffsets, + void* dY, Nd4jLong const* dYShapeInfo, Nd4jLong const* dYOffsets, + void* hIindexes, Nd4jLong const* hIndicesShapeInfo, void* dIindexes, Nd4jLong const* dIndicesShapeInfo); ND4J_EXPORT void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo); @@ -1570,7 +1570,7 @@ typedef sd::ConstantDataBuffer OpaqueConstantDataBuffer; ND4J_EXPORT OpaqueConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty); -ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong *data, int length); +ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong const* data, int length); ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferDouble(sd::DataType dtype, double *data, int length); ND4J_EXPORT OpaqueConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::ConstantDescriptor *descriptor); diff --git a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp index b3f15e345..ad75922e4 100644 --- a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp +++ b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp @@ -77,11 +77,11 @@ * @param hZShapeInfo */ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { @@ -106,22 +106,21 @@ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, int opNu */ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); - Nd4jLong* hz = reinterpret_cast(hZ); + auto hz = reinterpret_cast(hZ); BUILD_DOUBLE_SELECTOR(xType, zType, functions::indexreduce::IndexReduce, ::exec(opNum, hX, hXShapeInfo, extraParams, hz, hZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets), LIBND4J_TYPES, INDEXING_TYPES); -// BUILD_SINGLE_SELECTOR(xType, functions::indexreduce::IndexReduce, ::exec(opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets), LIBND4J_TYPES); } //////////////////////////////////////////////////////////////////////// @@ -139,16 +138,16 @@ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext *lc, */ void NativeOpExecutioner::execBroadcast(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { @@ -230,15 +229,15 @@ void NativeOpExecutioner::execBroadcast(sd::LaunchContext* lc, const int opNum, void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -269,17 +268,17 @@ void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { if (shape::isEmpty(hXShapeInfo) || shape::isEmpty(hYShapeInfo)) @@ -320,17 +319,17 @@ void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext* lc, const int opN void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); @@ -358,16 +357,16 @@ void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execBroadcastInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -422,16 +421,16 @@ void NativeOpExecutioner::execBroadcastInt(sd::LaunchContext *lc, const int opN } void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); @@ -471,14 +470,14 @@ void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, * @param n */ void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); @@ -504,14 +503,14 @@ void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseBoolTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -538,14 +537,14 @@ void NativeOpExecutioner::execPairwiseBoolTransform(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseIntTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); @@ -580,14 +579,14 @@ void NativeOpExecutioner::execPairwiseIntTransform(sd::LaunchContext *lc, * @param hZShapeInfo */ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { @@ -609,14 +608,14 @@ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -637,14 +636,14 @@ void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -665,14 +664,14 @@ void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -701,12 +700,12 @@ void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, * @return */ void NativeOpExecutioner::execReduceFloatScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -717,12 +716,12 @@ void NativeOpExecutioner::execReduceFloatScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceSameScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -732,14 +731,12 @@ void NativeOpExecutioner::execReduceSameScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceBoolScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { - - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -749,13 +746,12 @@ void NativeOpExecutioner::execReduceBoolScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceLongScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -779,14 +775,15 @@ void NativeOpExecutioner::execReduceLongScalar(sd::LaunchContext *lc, * @param dimensionLength */ void NativeOpExecutioner::execReduce3Scalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { + auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -807,15 +804,14 @@ void NativeOpExecutioner::execReduce3Scalar(sd::LaunchContext *lc, * @param hZShapeInfo */ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -826,17 +822,17 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *xTadOnlyShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadOnlyShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -867,18 +863,17 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -895,19 +890,17 @@ void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3TAD(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yTadOffsets) { - - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -948,14 +941,15 @@ void NativeOpExecutioner::execReduce3TAD(sd::LaunchContext *lc, * @param n */ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, - void *extraParams, bool allowParallelism) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hScalarShapeInfo, + const void *dScalar, const Nd4jLong *dScalarShapeInfo, + void *extraParams, + bool allowParallelism) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hScalarShapeInfo); @@ -983,16 +977,16 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const*hXShapeInfo, + void const* dX, Nd4jLong const*dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const*hZShapeInfo, + void *dZ, Nd4jLong const*dZShapeInfo, + void const* hScalars, Nd4jLong const*hScalarShapeInfo, + void const* dScalars, Nd4jLong const*dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const*tadShapeInfo, Nd4jLong const*tadOffsets, + Nd4jLong const*tadShapeInfoZ, Nd4jLong const*tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hScalarShapeInfo); @@ -1019,14 +1013,15 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hSscalarShapeInfo); @@ -1052,17 +1047,17 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalars, const Nd4jLong *hScalarShapeInfo, + const void *dScalars, const Nd4jLong *dScalarShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hScalarShapeInfo); @@ -1087,14 +1082,15 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hSscalarShapeInfo); @@ -1120,17 +1116,17 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalars, const Nd4jLong *hScalarShapeInfo, + const void *dScalars, const Nd4jLong *dScalarShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hScalarShapeInfo); @@ -1164,13 +1160,13 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, * @param hZShapeInfo */ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + bool biasCorrected) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -1190,13 +1186,13 @@ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, * @param hZShapeInfo */ void NativeOpExecutioner::execSummaryStatsScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + bool biasCorrected) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -1218,15 +1214,15 @@ void NativeOpExecutioner::execSummaryStatsScalar(sd::LaunchContext *lc, * @param dimensionLength */ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - bool biasCorrected) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + bool biasCorrected) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1246,13 +1242,13 @@ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, * @param n */ void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1268,13 +1264,13 @@ void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1290,13 +1286,14 @@ void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool allowParallelism) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + bool allowParallelism) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1319,13 +1316,13 @@ void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1341,13 +1338,13 @@ void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1363,11 +1360,11 @@ void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraArguments) { + int opNum, + Nd4jPointer state, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraArguments) { auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1380,14 +1377,13 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraArguments) { - + int opNum, + Nd4jPointer state, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraArguments) { auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1399,16 +1395,15 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraArguments) { - + int opNum, + Nd4jPointer state, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraArguments) { auto xType = sd::ArrayOptions::dataType(hZShapeInfo); diff --git a/libnd4j/include/legacy/cpu/NativeOps.cpp b/libnd4j/include/legacy/cpu/NativeOps.cpp index 5e810c6de..799351ccc 100644 --- a/libnd4j/include/legacy/cpu/NativeOps.cpp +++ b/libnd4j/include/legacy/cpu/NativeOps.cpp @@ -102,9 +102,9 @@ void setTADThreshold(int num) { */ void execIndexReduceScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execIndexReduceScalar(nullptr, opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, extraParams, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo); } catch (std::exception &e) { @@ -125,10 +125,10 @@ void execIndexReduceScalar(Nd4jPointer *extraPointers, * @param dimensionLength */ void execIndexReduce(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -176,18 +176,16 @@ void execIndexReduce(Nd4jPointer *extraPointers,int opNum, */ void execBroadcast(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, - dimensionLength); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, - dimensionLength); + auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPackX.primaryShapeInfo(); auto hTADOffsets = tadPackX.primaryOffsets(); @@ -216,19 +214,17 @@ void execBroadcast(Nd4jPointer *extraPointers, void execBroadcastBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, - dimensionLength); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, - dimensionLength); + auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPackX.primaryShapeInfo(); auto hTADOffsets = tadPackX.primaryOffsets(); @@ -272,9 +268,9 @@ void execBroadcastBool(Nd4jPointer *extraPointers, void execPairwiseTransform( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execPairwiseTransform(nullptr, @@ -301,9 +297,9 @@ void execPairwiseTransform( void execPairwiseTransformBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { @@ -340,9 +336,9 @@ void execPairwiseTransformBool( void execReduceFloat( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduceFloatScalar(nullptr, @@ -365,9 +361,9 @@ void execReduceFloat( void execReduceSame( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduceSameScalar(nullptr, @@ -390,9 +386,9 @@ void execReduceSame( void execReduceBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduceBoolScalar(nullptr, opNum, @@ -414,9 +410,9 @@ void execReduceBool( void execReduceLong( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduceLongScalar(nullptr, opNum, @@ -446,16 +442,15 @@ void execReduceLong( */ void execReduceFloat2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, - dimensionLength); + auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPackX.primaryShapeInfo(); auto hTADOffsets = tadPackX.primaryOffsets(); @@ -482,13 +477,13 @@ void execReduceFloat2(Nd4jPointer *extraPointers, void execReduceBool2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); @@ -518,10 +513,10 @@ void execReduceBool2(Nd4jPointer *extraPointers, void execReduceSame2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -554,16 +549,15 @@ void execReduceSame2(Nd4jPointer *extraPointers, void execReduceLong2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, - dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPack.primaryShapeInfo(); auto hTADOffsets = tadPack.primaryOffsets(); @@ -601,10 +595,10 @@ void execReduceLong2(Nd4jPointer *extraPointers, */ void execReduce3(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduce3(nullptr, opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, extraParams, dbY->primary(), hYShapeInfo, dbY->special(), dYShapeInfo, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo); @@ -624,10 +618,10 @@ void execReduce3(Nd4jPointer *extraPointers, * @param hYShapeInfo */ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduce3Scalar(nullptr, opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, extraParams, dbY->primary(), hYShapeInfo, dbY->special(), dYShapeInfo, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo); @@ -651,16 +645,16 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, */ void execReduce3Tad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); if (extraPointers == nullptr || extraPointers[2] == 0) { NativeOpExecutioner::execReduce3(LaunchContext::defaultContext(), opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, @@ -704,9 +698,9 @@ bool isBlasVersionMatches(int major, int minor, int build) { void execScalar( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbScalar, const Nd4jLong *hScalarShapeInfo, const Nd4jLong *dScalarShapeInfo, void *extraParams) { try { NativeOpExecutioner::execScalar(nullptr, @@ -733,9 +727,9 @@ void execScalar( void execScalarBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbScalar, const Nd4jLong *hScalarShapeInfo, const Nd4jLong *dScalarShapeInfo, void *extraParams) { try { NativeOpExecutioner::execScalarBool(nullptr, @@ -768,9 +762,9 @@ void execScalarBool( */ void execSummaryStatsScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, bool biasCorrected) { try { NativeOpExecutioner::execSummaryStatsScalar(nullptr, @@ -801,9 +795,9 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers, */ void execSummaryStats(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, bool biasCorrected) { try { NativeOpExecutioner::execSummaryStats(nullptr, @@ -836,12 +830,12 @@ void execSummaryStats(Nd4jPointer *extraPointers, */ void execSummaryStatsTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape, bool biasCorrected, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -882,8 +876,8 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers, void execTransformFloat( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformFloat(nullptr, @@ -908,8 +902,8 @@ void execTransformFloat( void execTransformSame( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformSame(nullptr, @@ -934,8 +928,8 @@ void execTransformSame( void execTransformBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformBool(nullptr, @@ -960,8 +954,8 @@ void execTransformBool( void execTransformAny( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformAny(nullptr, @@ -986,8 +980,8 @@ void execTransformAny( void execTransformStrict( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformStrict(nullptr, @@ -1011,19 +1005,17 @@ void execTransformStrict( void execReduce3All(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, - Nd4jLong *yOffsets) { + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); NativeOpExecutioner::execReduce3All(nullptr, opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, extraParamsVals, dbY->primary(), @@ -1046,7 +1038,7 @@ void specialConcat( Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *hZ, - Nd4jLong *hZShapeInfo, + Nd4jLong const* hZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { try { @@ -1227,7 +1219,7 @@ void setGridLimit(int gridSize) { // no-op } -sd::TadPack* tadOnlyShapeInfo(Nd4jLong *hXShapeInfo, int *dimension, int dimensionLength) { +sd::TadPack* tadOnlyShapeInfo(Nd4jLong const* hXShapeInfo, int *dimension, int dimensionLength) { auto pack = new TadPack(); try { *pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); @@ -1239,21 +1231,26 @@ sd::TadPack* tadOnlyShapeInfo(Nd4jLong *hXShapeInfo, int *dimension, int dimensi return pack; } -Nd4jLong* getPrimaryShapeInfo(sd::TadPack* pack) { - return pack->primaryShapeInfo(); +Nd4jLong const* getPrimaryShapeInfo(sd::TadPack* pack) { + return const_cast(pack->primaryShapeInfo()); } -Nd4jLong* getPrimaryOffsets(sd::TadPack* pack) { - return pack->primaryOffsets(); + +Nd4jLong const* getPrimaryOffsets(sd::TadPack* pack) { + return const_cast(pack->primaryOffsets()); } -Nd4jLong* getSpecialShapeInfo(sd::TadPack* pack) { - return pack->specialShapeInfo(); + +Nd4jLong const* getSpecialShapeInfo(sd::TadPack* pack) { + return const_cast(pack->specialShapeInfo()); } -Nd4jLong* getSpecialOffsets(sd::TadPack* pack) { - return pack->specialOffsets(); + +Nd4jLong const* getSpecialOffsets(sd::TadPack* pack) { + return const_cast(pack->specialOffsets()); } + Nd4jLong getNumberOfTads(sd::TadPack* pack) { return pack->numberOfTads(); } + int getShapeInfoLength(sd::TadPack* pack) { return pack->shapeInfoLength(); } @@ -1270,15 +1267,15 @@ Nd4jPointer getConstantSpace() { template void pullRowsGeneric(void *vx, - Nd4jLong *hXShapeInfo, + Nd4jLong const* hXShapeInfo, void *vz, - Nd4jLong *hZShapeInfo, + Nd4jLong const* hZShapeInfo, const int n, - Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets) { + Nd4jLong const* indexes, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, + Nd4jLong const* zTadOffsets) { auto hX = reinterpret_cast(vx); auto hZ = reinterpret_cast(vz); @@ -1322,14 +1319,14 @@ void pullRowsGeneric(void *vx, } void pullRows(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, Nd4jLong n, - Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets) { + Nd4jLong* indexes, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, + Nd4jLong const* zTadOffsets) { try { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -1342,11 +1339,11 @@ void pullRows(Nd4jPointer *extraPointers, template void tearGeneric(void *vx, - Nd4jLong *hXShapeInfo, + Nd4jLong const* hXShapeInfo, Nd4jPointer *targets, - Nd4jLong *hZShapeInfo, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + Nd4jLong const* hZShapeInfo, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { auto hX = reinterpret_cast(vx); @@ -1381,11 +1378,11 @@ void tearGeneric(void *vx, } void tear(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, Nd4jPointer *targets, - Nd4jLong *hZShapeInfo, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + Nd4jLong const* hZShapeInfo, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { try { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -1398,10 +1395,10 @@ void tear(Nd4jPointer *extraPointers, void average(Nd4jPointer *extras, - Nd4jPointer *hX, Nd4jLong *hXShapeInfo, - Nd4jPointer *dX, Nd4jLong *dXShapeInfo, - void *z, Nd4jLong *hZShapeInfo, - void *dz, Nd4jLong *dZShapeInfo, + Nd4jPointer *hX, const Nd4jLong *hXShapeInfo, + Nd4jPointer *dX, const Nd4jLong *dXShapeInfo, + void *z, const Nd4jLong *hZShapeInfo, + void *dz, const Nd4jLong *dZShapeInfo, int n, Nd4jLong length, bool propagate) { @@ -1416,10 +1413,10 @@ void average(Nd4jPointer *extras, } void accumulate(Nd4jPointer *extras, - Nd4jPointer *hX, Nd4jLong *hXShapeInfo, - Nd4jPointer *dX, Nd4jLong *dXShapeInfo, - void *hz, Nd4jLong *hZShapeInfo, - void *dz, Nd4jLong *dZShapeInfo, + Nd4jPointer *hX, Nd4jLong const* hXShapeInfo, + Nd4jPointer *dX, Nd4jLong const* dXShapeInfo, + void *hz, Nd4jLong const* hZShapeInfo, + void *dz, Nd4jLong const* dZShapeInfo, int n, Nd4jLong length) { try { @@ -1436,6 +1433,28 @@ void enableP2P(bool enable) { // no-op } + + +void encodeThresholdP1(Nd4jPointer *extraPointers, void *hX, Nd4jLong const* hXShapeInfo, Nd4jLong N, int *dz, float threshold) { + // TODO: to be implemented +} + + +void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *hX, Nd4jLong N, int *dz) { + // TODO: to be implemented +} + + +void encodeThresholdP3(Nd4jPointer *extraPointers, void *hX, Nd4jLong const* hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ + // offsets won't be used here + + // TODO: to be implemented +} + +void decodeThreshold(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, const Nd4jLong *hZShapeInfo){ + // TODO: to be implemented +} + bool isP2PAvailable() { // always TRUE for cpu backend return true; @@ -1445,8 +1464,12 @@ void checkP2P() { // no-op } +void decodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, Nd4jLong const* hZShapeInfo) { + NativeOpExecutioner::decodeBitmap(hX, N, dz, hZShapeInfo); +} + template -void shuffleGeneric(void **hX, Nd4jLong **hXShapeInfo, void **dz, Nd4jLong **hZShapeInfo, int N, int *shuffleMap, Nd4jLong **tadOnlyShapeInfo, Nd4jLong **tadOffsets) { +void shuffleGeneric(void **hX, Nd4jLong * const*hXShapeInfo, void **dz, Nd4jLong * const* hZShapeInfo, int N, int *shuffleMap, Nd4jLong * const* tadOnlyShapeInfo, Nd4jLong * const* tadOffsets) { auto dX = reinterpret_cast(hX); auto dZ = reinterpret_cast(dz); @@ -1517,10 +1540,10 @@ void shuffle(Nd4jPointer *extras, Nd4jPointer *tadShapeInfo, Nd4jPointer *tadOffsets) { try { - auto xShape = reinterpret_cast(hXShapeInfo); - auto zShape = reinterpret_cast(hZShapeInfo); - auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); - auto tadOffset = reinterpret_cast(tadOffsets); + auto xShape = reinterpret_cast(hXShapeInfo); + auto zShape = reinterpret_cast(hZShapeInfo); + auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); + auto tadOffset = reinterpret_cast(tadOffsets); auto xType = sd::ArrayOptions::dataType(xShape[0]); @@ -1548,13 +1571,13 @@ int getDevice() { void execScalarTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const*dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const*dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const*tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const*tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -1588,13 +1611,13 @@ void execScalarTad(Nd4jPointer *extraPointers, void execScalarBoolTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbScalars, const Nd4jLong *hScalarShapeInfo, const Nd4jLong *dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -1696,7 +1719,7 @@ void execAggregateBatch(Nd4jPointer *extraPointers, void execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraArguments) { try { NativeOpExecutioner::execRandom(nullptr, opNum, state, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo, extraArguments); @@ -1709,9 +1732,9 @@ void execRandom(Nd4jPointer *extraPointers, void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraArguments) { try { NativeOpExecutioner::execRandom(nullptr, opNum, state, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, dbY->primary(), hYShapeInfo, dbY->special(), dYShapeInfo, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo, extraArguments); @@ -1724,8 +1747,8 @@ void execRandom3(Nd4jPointer *extraPointers, void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraArguments) { try { NativeOpExecutioner::execRandom(nullptr, opNum, state, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo, extraArguments); @@ -1793,8 +1816,8 @@ Nd4jPointer pointerForAddress(Nd4jLong address) { } void sort(Nd4jPointer *extraPointers, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void *hX, const Nd4jLong *hXShapeInfo, + void *dX, const Nd4jLong *dXShapeInfo, bool descending) { try { NativeOpExecutioner::execSort(hX, hXShapeInfo, descending); @@ -1805,12 +1828,11 @@ void sort(Nd4jPointer *extraPointers, } void sortTad(Nd4jPointer *extraPointers, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, + void *hX, const Nd4jLong *hXShapeInfo, + void *dX, const Nd4jLong *dXShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, bool descending) { try { NativeOpExecutioner::execSort(hX, hXShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); @@ -1833,6 +1855,12 @@ void sortCooIndices(Nd4jPointer *extraPointers, } } +Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong const* hXShapeInfo, Nd4jLong N, int *dz, float threshold) { + return NativeOpExecutioner::encodeBitmap(hX, hXShapeInfo, N, dz, threshold); +} + + + Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { auto hZ = new Nd4jLong[2];errno = 0; try { @@ -1916,7 +1944,7 @@ FORCEINLINE int estimateThresholdGeneric(Nd4jPointer *extraPointers, Nd4jPointer } -int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer hX, Nd4jLong *hXShapeInfo, int N, float threshold) { +int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer hX, Nd4jLong const* hXShapeInfo, int N, float threshold) { try { auto xType = ArrayOptions::dataType(hXShapeInfo); BUILD_SINGLE_SELECTOR(xType, return estimateThresholdGeneric, (extraPointers, hX, N, threshold), FLOAT_TYPES); @@ -1931,8 +1959,8 @@ Nd4jLong getShapeListSize(sd::ShapeList* list) { return list->size(); } -Nd4jLong* getShape(sd::ShapeList* list, Nd4jLong i) { - return list->at(i); +Nd4jLong const* getShape(sd::ShapeList* list, Nd4jLong i) { + return const_cast(list->at(i)); } void deleteShapeList(Nd4jPointer shapeList) { @@ -2226,8 +2254,8 @@ const char* getVariableName(sd::graph::Variable* variable) { return variable->getName()->c_str(); } -Nd4jLong* getVariableShape(sd::graph::Variable* variable) { - return variable->getNDArray()->shapeInfo(); +Nd4jLong const* getVariableShape(sd::graph::Variable* variable) { + return const_cast(variable->getNDArray()->shapeInfo()); } void* getVariableBuffer(sd::graph::Variable* variable) { @@ -2569,12 +2597,13 @@ void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr) { } template -static void _scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, - void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, - void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, - void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, - void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, - void* vIindexes, Nd4jLong* hIndicesShapeInfo, void* dIindexes, Nd4jLong* dIndicesShapeInfo) { +static void _scatterUpdate( + Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, + void* hX, const Nd4jLong* hXShapeInfo, const Nd4jLong* hXOffsets, + void* dX, const Nd4jLong* dXShapeInfo, const Nd4jLong* dXOffsets, + void* hY, const Nd4jLong* hYShapeInfo, const Nd4jLong* hYOffsets, + void* dY, const Nd4jLong* dYShapeInfo, const Nd4jLong* dYOffsets, + void* vIindexes, const Nd4jLong* hIndicesShapeInfo, void* dIindexes, const Nd4jLong* dIndicesShapeInfo) { auto hIindexes = reinterpret_cast(vIindexes); auto func = PRAGMA_THREADS_DO { @@ -2626,11 +2655,11 @@ static void _scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSub //////////////////////////////////////////////////////////////////////// void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, - void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, - void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, - void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, - void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, - void* hIindexes, Nd4jLong* hIndicesShapeInfo, void* dIindexes, Nd4jLong* dIndicesShapeInfo) { + void* hX, const Nd4jLong* hXShapeInfo, const Nd4jLong* hXOffsets, + void* dX, const Nd4jLong* dXShapeInfo, const Nd4jLong* dXOffsets, + void* hY, const Nd4jLong* hYShapeInfo, const Nd4jLong* hYOffsets, + void* dY, const Nd4jLong* dYShapeInfo, const Nd4jLong* dYOffsets, + void* hIindexes, const Nd4jLong* hIndicesShapeInfo, void* dIindexes, const Nd4jLong* dIndicesShapeInfo) { auto iType = ArrayOptions::dataType(hIndicesShapeInfo); try { @@ -2686,7 +2715,7 @@ void deleteTadPack(sd::TadPack* ptr) { delete ptr; } -sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong *data, int length) { +sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, const Nd4jLong *data, int length) { return nullptr; } @@ -2847,7 +2876,7 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { } else { shapeBuffer = sd::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); } - return reinterpret_cast(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); + return const_cast(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); @@ -2856,10 +2885,10 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { } void sortByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, const Nd4jLong *xShapeInfo, + void *dx, const Nd4jLong *dxShapeInfo, + void *y, const Nd4jLong *yShapeInfo, + void *dy, const Nd4jLong *dyShapeInfo, bool descending) { try { auto xType = ArrayOptions::dataType(xShapeInfo); @@ -2873,10 +2902,10 @@ void sortByKey(Nd4jPointer *extraPointers, } void sortByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, const Nd4jLong *xShapeInfo, + void *dx, const Nd4jLong *dxShapeInfo, + void *y, const Nd4jLong *yShapeInfo, + void *dy, const Nd4jLong *dyShapeInfo, bool descending) { try { auto xType = ArrayOptions::dataType(xShapeInfo); @@ -2890,12 +2919,11 @@ void sortByValue(Nd4jPointer *extraPointers, } void sortTadByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, - int *dimension, - int dimensionLength, + void *x, const Nd4jLong *xShapeInfo, + void *dx, const Nd4jLong *dxShapeInfo, + void *y, const Nd4jLong *yShapeInfo, + void *dy, const Nd4jLong *dyShapeInfo, + int *dimension, int dimensionLength, bool descending) { try { auto xType = ArrayOptions::dataType(xShapeInfo); @@ -2909,12 +2937,11 @@ void sortTadByKey(Nd4jPointer *extraPointers, } void sortTadByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, - int *dimension, - int dimensionLength, + void *x, const Nd4jLong *xShapeInfo, + void *dx, const Nd4jLong *dxShapeInfo, + void *y, const Nd4jLong *yShapeInfo, + void *dy, const Nd4jLong *dyShapeInfo, + int *dimension, int dimensionLength, bool descending) { try { auto xType = ArrayOptions::dataType(xShapeInfo); @@ -3195,8 +3222,8 @@ void dbClose(OpaqueDataBuffer *dataBuffer) { dataBuffer->getDataBuffer()->close(); } -BUILD_SINGLE_TEMPLATE(template void pullRowsGeneric, (void *, Nd4jLong*, void*, Nd4jLong*, const int, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES); -BUILD_SINGLE_TEMPLATE(template void tearGeneric, (void *, Nd4jLong*, Nd4jPointer*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES); -BUILD_SINGLE_TEMPLATE(template void shuffleGeneric, (void**, Nd4jLong**, void**, Nd4jLong**, int, int*, Nd4jLong**, Nd4jLong**), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void pullRowsGeneric, (void *, Nd4jLong const*, void*, Nd4jLong const*, const int, Nd4jLong const*, Nd4jLong const*, Nd4jLong const*, Nd4jLong const*, Nd4jLong const*), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void tearGeneric, (void *, Nd4jLong const* , Nd4jPointer*, Nd4jLong const*, Nd4jLong const*, Nd4jLong const*), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void shuffleGeneric, (void**, Nd4jLong* const*, void**, Nd4jLong* const*, int, int*, Nd4jLong* const*, Nd4jLong* const*), LIBND4J_TYPES); diff --git a/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu b/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu index 00a9ea03f..f01daffd7 100644 --- a/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu +++ b/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu @@ -87,12 +87,12 @@ extern "C" __global__ void prepareShapeBuffer(int *dimension, int *maxDimension, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams) { auto stream = lc->getCudaStream(); @@ -128,12 +128,12 @@ void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseBoolTransform( sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams) { auto stream = lc->getCudaStream(); @@ -164,12 +164,12 @@ void NativeOpExecutioner::execPairwiseBoolTransform( sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseIntTransform( sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void * hZ, Nd4jLong const* hZShapeInfo, + void * dZ, Nd4jLong const* dZShapeInfo, void *extraParams) { auto stream = lc->getCudaStream(); @@ -200,11 +200,11 @@ void NativeOpExecutioner::execPairwiseIntTransform( sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execSummaryStatsScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, bool biasCorrected) { auto stream = lc->getCudaStream(); @@ -226,16 +226,16 @@ void NativeOpExecutioner::execSummaryStatsScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -300,16 +300,16 @@ void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext* lc, const int opN void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void* hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -338,15 +338,15 @@ void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execBroadcastInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ,Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -413,15 +413,15 @@ void NativeOpExecutioner::execBroadcastInt(sd::LaunchContext* lc, const int opNu void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ,Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -465,15 +465,15 @@ void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, */ void NativeOpExecutioner::execBroadcast(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ,Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -536,15 +536,15 @@ void NativeOpExecutioner::execBroadcast(sd::LaunchContext *lc, const int opNum, void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ,Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -572,13 +572,13 @@ void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -607,13 +607,13 @@ void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension,int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -643,13 +643,13 @@ void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -689,13 +689,13 @@ void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, */ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -734,13 +734,13 @@ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext *lc, */ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension,int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -774,11 +774,11 @@ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo){ + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo){ if (sd::Environment::getInstance()->isDebug()) printf("F1 opNum:[%i]\n", opNum); @@ -825,11 +825,11 @@ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceFloatScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -854,11 +854,11 @@ void NativeOpExecutioner::execReduceFloatScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceBoolScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -885,11 +885,11 @@ void NativeOpExecutioner::execReduceBoolScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceSameScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -916,11 +916,11 @@ void NativeOpExecutioner::execReduceSameScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceLongScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -947,12 +947,12 @@ void NativeOpExecutioner::execReduceLongScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); @@ -981,12 +981,12 @@ void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); @@ -1015,12 +1015,12 @@ void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool allowParallelism) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool allowParallelism) { auto stream = lc->getCudaStream(); @@ -1050,12 +1050,12 @@ void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); @@ -1084,12 +1084,12 @@ void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -1118,11 +1118,11 @@ void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, bool biasCorrected) { auto stream = lc->getCudaStream(); @@ -1147,13 +1147,13 @@ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -1178,13 +1178,13 @@ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -1215,16 +1215,16 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong* tadOnlyShapeInfo, Nd4jLong* tadOffsets, - Nd4jLong* yTadOnlyShapeInfo, Nd4jLong* yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { if(shape::isScalar(hZShapeInfo)) { NativeOpExecutioner::execReduce3(lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); @@ -1268,13 +1268,13 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3Scalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); @@ -1308,12 +1308,12 @@ void NativeOpExecutioner::execReduce3Scalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalar, Nd4jLong const* hScalarShapeInfo, + void const* dScalar, Nd4jLong const* dScalarShapeInfo, void *extraParams, bool allowParallelism) { auto stream = lc->getCudaStream(); @@ -1344,16 +1344,16 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalars, Nd4jLong const* hScalarShapeInfo, + void const* dScalars, Nd4jLong const* dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -1383,12 +1383,12 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalar, Nd4jLong const* hScalarShapeInfo, + void const* dScalar, Nd4jLong const* dScalarShapeInfo, void *extraParams, bool allowParallelism) { auto stream = lc->getCudaStream(); @@ -1419,16 +1419,16 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalars, Nd4jLong const* hScalarShapeInfo, + void const* dScalars, Nd4jLong const* dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -1458,12 +1458,12 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void* hZ, Nd4jLong const* hZShapeInfo, + void* dZ, Nd4jLong const* dZShapeInfo, + void const* hScalar, Nd4jLong const* hScalarShapeInfo, + void const* dScalar, Nd4jLong const* dScalarShapeInfo, void *extraParams, bool allowParallelism) { auto stream = lc->getCudaStream(); @@ -1493,16 +1493,16 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalars, Nd4jLong const* hScalarShapeInfo, + void const* dScalars, Nd4jLong const* dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -1531,8 +1531,8 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, int opNum, Nd4jPointer stateHost, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraArguments) { auto stream = lc->getCudaStream(); @@ -1564,10 +1564,10 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, int opNum, Nd4jPointer stateHost, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraArguments) { auto stream = lc->getCudaStream(); @@ -1599,12 +1599,12 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, int opNum, Nd4jPointer stateHost, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraArguments) { auto stream = lc->getCudaStream(); @@ -1634,16 +1634,16 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { + Nd4jLong const* xTadShapeInfo, Nd4jLong const* xOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yOffsets) { auto stream = lc->getCudaStream(); auto allocationPointer = lc->getAllocationPointer(); @@ -1676,16 +1676,16 @@ void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3TAD(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yTadOffsets) { if(shape::isScalar(hZShapeInfo)) { NativeOpExecutioner::execReduce3(lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); diff --git a/libnd4j/include/legacy/cuda/NativeOps.cu b/libnd4j/include/legacy/cuda/NativeOps.cu index 0f2abca27..8be9b3bfd 100755 --- a/libnd4j/include/legacy/cuda/NativeOps.cu +++ b/libnd4j/include/legacy/cuda/NativeOps.cu @@ -123,8 +123,8 @@ int getDeviceSharedThreshold(int deviceId) { sd::buffer::Buffer * createScalarBuffer(cudaStream_t stream) { - Nd4jLong *scalarShapeInfo = shape::createScalarShapeInfo(); - sd::buffer::Buffer *buff = sd::buffer::createBuffer(scalarShapeInfo,shape::shapeInfoLength(2), stream); + auto scalarShapeInfo = shape::createScalarShapeInfo(); + auto buff = sd::buffer::createBuffer(scalarShapeInfo,shape::shapeInfoLength(2), stream); sd::buffer::copyDataToGpu(&buff, stream); return buff; } @@ -229,9 +229,9 @@ public: void execPairwiseTransform( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -251,9 +251,9 @@ void execPairwiseTransform( Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execPairwiseTransformBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -275,9 +275,9 @@ void execPairwiseTransformBool(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execSummaryStatsScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, bool biasCorrected) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -299,11 +299,11 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execBroadcastBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -348,10 +348,10 @@ void execBroadcastBool(Nd4jPointer *extraPointers, void execBroadcast( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -399,9 +399,9 @@ void execBroadcast( //////////////////////////////////////////////////////////////////////// void execReduceFloat(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -421,9 +421,9 @@ void execReduceFloat(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceSame(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -443,10 +443,10 @@ void execReduceSame(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceSame2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const*hXShapeInfo, Nd4jLong const*dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const*hZShapeInfo, Nd4jLong const*dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const*hDimensionShape, Nd4jLong const*dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -476,10 +476,10 @@ void execReduceSame2(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceLong2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const*hXShapeInfo, Nd4jLong const*dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const*hZShapeInfo, Nd4jLong const*dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const*hDimensionShape, Nd4jLong const*dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -509,9 +509,9 @@ void execReduceLong2(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceLong(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const*hXShapeInfo, Nd4jLong const*dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, Nd4jLong const*hZShapeInfo, Nd4jLong const*dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -551,10 +551,10 @@ void execReduceLong(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceBool2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const*hXShapeInfo, Nd4jLong const*dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const*hZShapeInfo, Nd4jLong const*dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const*hDimensionShape, Nd4jLong const*dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -584,9 +584,9 @@ void execReduceBool2(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -637,10 +637,10 @@ void execReduceBool(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execIndexReduce(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -679,10 +679,10 @@ void execIndexReduce(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceFloat2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -720,9 +720,9 @@ void execReduceFloat2(Nd4jPointer *extraPointers, void execIndexReduceScalar( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo){ + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo){ try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -741,8 +741,8 @@ void execIndexReduceScalar( //////////////////////////////////////////////////////////////////////// void execTransformSame(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -766,8 +766,8 @@ void execTransformSame(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execTransformBool(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -791,8 +791,8 @@ void execTransformBool(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execTransformAny(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -817,8 +817,8 @@ void execTransformAny(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execTransformStrict(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -842,8 +842,8 @@ void execTransformStrict(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execTransformFloat(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -1368,7 +1368,7 @@ void specialConcat( Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *dZ, - Nd4jLong *dZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { + Nd4jLong const* dZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { try { BUILD_SINGLE_SELECTOR(ArrayOptions::dataType(dZShapeInfo), sd::SpecialMethods, ::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, dZ, dZShapeInfo), @@ -1383,7 +1383,7 @@ void specialConcat( /** * This method saves */ -sd::TadPack* tadOnlyShapeInfo(Nd4jLong *dXShapeInfo, int *dimension, int dimensionLength) { +sd::TadPack* tadOnlyShapeInfo(Nd4jLong const* dXShapeInfo, int *dimension, int dimensionLength) { try { auto pack = new TadPack(); *pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(dXShapeInfo, dimension, dimensionLength); @@ -1395,16 +1395,16 @@ sd::TadPack* tadOnlyShapeInfo(Nd4jLong *dXShapeInfo, int *dimension, int dimensi } } -Nd4jLong* getPrimaryShapeInfo(sd::TadPack* pack) { +Nd4jLong const* getPrimaryShapeInfo(sd::TadPack* pack) { return pack->primaryShapeInfo(); } -Nd4jLong* getPrimaryOffsets(sd::TadPack* pack) { +Nd4jLong const* getPrimaryOffsets(sd::TadPack* pack) { return pack->primaryOffsets(); } -Nd4jLong* getSpecialShapeInfo(sd::TadPack* pack) { +Nd4jLong const* getSpecialShapeInfo(sd::TadPack* pack) { return pack->specialShapeInfo(); } -Nd4jLong* getSpecialOffsets(sd::TadPack* pack) { +Nd4jLong const* getSpecialOffsets(sd::TadPack* pack) { return pack->specialOffsets(); } Nd4jLong getNumberOfTads(sd::TadPack* pack) { @@ -1460,14 +1460,14 @@ Nd4jPointer getConstantSpace() { } void pullRows(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *xShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *zShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* xShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* zShapeInfo, Nd4jLong const* dZShapeInfo, Nd4jLong n, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets) { + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, + Nd4jLong const* zTadOffsets) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -1489,10 +1489,10 @@ void pullRows(Nd4jPointer *extraPointers, void average(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dXShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, + Nd4jPointer *x, Nd4jLong const* xShapeInfo, + Nd4jPointer *dx, Nd4jLong const* dXShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void *dz, Nd4jLong const* dzShapeInfo, int n, Nd4jLong length, bool propagate) { @@ -1524,10 +1524,10 @@ void average(Nd4jPointer *extras, } void accumulate(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dXShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, + Nd4jPointer *x, Nd4jLong const* xShapeInfo, + Nd4jPointer *dx, Nd4jLong const* dXShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void *dz, Nd4jLong const* dzShapeInfo, int n, Nd4jLong length) { try { @@ -1572,8 +1572,8 @@ void shuffle(Nd4jPointer *extras, auto dX = reinterpret_cast(dx); auto dZ = reinterpret_cast(dz); - auto xShape = reinterpret_cast(xShapeInfo); - auto dxShape = reinterpret_cast(dXShapeInfo); + auto xShape = reinterpret_cast(xShapeInfo); + auto dxShape = reinterpret_cast(dXShapeInfo); auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); auto tadOffset = reinterpret_cast(tadOffsets); @@ -1614,9 +1614,9 @@ void setTADThreshold(int num) { //////////////////////////////////////////////////////////////////////// void execSummaryStats(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, bool biasCorrected) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -1638,12 +1638,12 @@ void execSummaryStats(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execSummaryStatsTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, bool biasCorrected, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbDimension}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -1670,10 +1670,10 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduce3(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -1694,13 +1694,13 @@ void execReduce3(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduce3Tad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -1744,10 +1744,10 @@ void execReduce3Tad(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -1768,9 +1768,9 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execScalarBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalar, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbScalar}); @@ -1792,13 +1792,13 @@ void execScalarBool(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execScalarBoolTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbScalars}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -1825,9 +1825,9 @@ void execScalarBoolTad(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalar, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbScalar}); @@ -1849,13 +1849,13 @@ void execScalar(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execScalarTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbScalars}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -1931,7 +1931,7 @@ void execAggregateBatch(Nd4jPointer *extraPointers, void execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraArguments) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {}); @@ -1950,8 +1950,8 @@ void execRandom(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraArguments) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -1971,9 +1971,9 @@ void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, //////////////////////////////////////////////////////////////////////// void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraArguments) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -2091,11 +2091,11 @@ Nd4jPointer pointerForAddress(Nd4jLong address) { } void tear(Nd4jPointer *extras, - OpaqueDataBuffer *dbX, Nd4jLong *xShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* xShapeInfo, Nd4jLong const* dXShapeInfo, Nd4jPointer *targets, - Nd4jLong *zShapeInfo, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + Nd4jLong const* zShapeInfo, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { try { InteropDataBuffer::prepareSpecialUse({}, {dbX}); @@ -2200,13 +2200,13 @@ void prescanArrayRecursive(Nd4jPointer *extras, int *dZ, int *dX, int numElement //////////////////////////////////////////////////////////////////////// void execReduce3All(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* xTadShapeInfo, Nd4jLong const* xOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yOffsets) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY, dbDimension}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -2232,8 +2232,8 @@ void execReduce3All(Nd4jPointer *extraPointers, void sort(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, bool descending) { try { cudaStream_t *stream = reinterpret_cast(extraPointers[1]); @@ -2298,10 +2298,10 @@ void sort(Nd4jPointer *extraPointers, void sortByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, bool descending) { try { auto stream = reinterpret_cast(extraPointers[1]); @@ -2372,10 +2372,10 @@ void sortByKey(Nd4jPointer *extraPointers, } void sortByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, bool descending) { try { auto stream = reinterpret_cast(extraPointers[1]); @@ -2447,10 +2447,10 @@ void sortByValue(Nd4jPointer *extraPointers, void sortTadByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, int *dimension, int dimensionLength, bool descending) { @@ -2474,10 +2474,10 @@ void sortTadByKey(Nd4jPointer *extraPointers, } void sortTadByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, int *dimension, int dimensionLength, bool descending) { @@ -2503,12 +2503,12 @@ void sortTadByValue(Nd4jPointer *extraPointers, void sortTad(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, bool descending) { try { // to be implemented @@ -2653,7 +2653,7 @@ Nd4jLong getShapeListSize(sd::ShapeList* list) { return list->size(); } -Nd4jLong* getShape(sd::ShapeList* list, Nd4jLong i) { +Nd4jLong const* getShape(sd::ShapeList* list, Nd4jLong i) { return list->at(i); } @@ -2877,7 +2877,7 @@ const char* getVariableName(sd::graph::Variable* variable) { return variable->getName()->c_str(); } -Nd4jLong* getVariableShape(sd::graph::Variable* variable) { +Nd4jLong const* getVariableShape(sd::graph::Variable* variable) { return variable->getNDArray()->shapeInfo(); } @@ -3026,7 +3026,7 @@ void deleteResultWrapper(Nd4jPointer ptr) { delete p; } -int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer dX, Nd4jLong *dXShapeInfo, int N, float threshold) { +int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer dX, Nd4jLong const* dXShapeInfo, int N, float threshold) { throw std::runtime_error("estimateThreshold: Not implemented yet"); } @@ -3237,7 +3237,7 @@ void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr) { /////////////////////////////////////////////////////////////////// template __global__ static void scatterUpdateCuda(const int opCode, const int numOfSubArrs, - void* vx, const Nd4jLong *xShapeInfo, const Nd4jLong *xOffsets, + void* vx, const Nd4jLong* xShapeInfo, const Nd4jLong *xOffsets, void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, const void* vindexes) { @@ -3300,7 +3300,7 @@ __global__ static void scatterUpdateCuda(const int opCode, const int numOfSubArr } template -__host__ static void scatterUpdateCudaLauncher(const cudaStream_t* stream, const int opCode, const int numOfSubArrs, void* vx, const Nd4jLong *xShapeInfo, const Nd4jLong *xOffsets, void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, const void* indexes) { +__host__ static void scatterUpdateCudaLauncher(const cudaStream_t* stream, const int opCode, const int numOfSubArrs, void* vx, const Nd4jLong const* xShapeInfo, const Nd4jLong* xOffsets, void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, const void* indexes) { scatterUpdateCuda<<<512, 256, MAX_NUM_THREADS, *stream>>>(opCode, numOfSubArrs, vx, xShapeInfo, xOffsets, vy, yShapeInfo, yOffsets, indexes); } @@ -3308,11 +3308,11 @@ __host__ static void scatterUpdateCudaLauncher(const cudaStream_t* stream, const ////////////////////////////////////////////////////////////////////////// void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, - void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, - void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, - void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, - void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, - void* hIindexes, Nd4jLong* hIndicesShapeInfo, void* dIindexes, Nd4jLong* dIndicesShapeInfo) { + void* hX, Nd4jLong const* hXShapeInfo, Nd4jLong const* hXOffsets, + void* dX, Nd4jLong const* dXShapeInfo, Nd4jLong const* dXOffsets, + void* hY, Nd4jLong const* hYShapeInfo, Nd4jLong const* hYOffsets, + void* dY, Nd4jLong const* dYShapeInfo, Nd4jLong const* dYOffsets, + void* hIindexes, Nd4jLong const* hIndicesShapeInfo, void* dIindexes, Nd4jLong const* dIndicesShapeInfo) { try { auto stream = reinterpret_cast(extraPointers[1]); @@ -3409,7 +3409,7 @@ bool isBlasVersionMatches(int major, int minor, int build) { return result; } -sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong *data, int length) { +sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong const* data, int length) { return sd::ConstantHelper::getInstance()->constantBuffer(ConstantDescriptor(data, length), dtype); } @@ -3555,8 +3555,7 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { } else { shapeBuffer = sd::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); } - return reinterpret_cast(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, - true)); + return (Nd4jPointer)(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); // TO DO: this can lead to unpleasant crash sometimes } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); diff --git a/libnd4j/include/loops/BroadcastScalarConverter.h b/libnd4j/include/loops/BroadcastScalarConverter.h index 12006c293..f4d536f33 100644 --- a/libnd4j/include/loops/BroadcastScalarConverter.h +++ b/libnd4j/include/loops/BroadcastScalarConverter.h @@ -21,6 +21,7 @@ #define DEV_TESTS_BROADCASTSCALARCONVERTER_H #include +#include #include namespace sd { diff --git a/libnd4j/include/loops/broadcasting.h b/libnd4j/include/loops/broadcasting.h index 20c95588c..4f05f0c6e 100755 --- a/libnd4j/include/loops/broadcasting.h +++ b/libnd4j/include/loops/broadcasting.h @@ -56,18 +56,15 @@ namespace functions { class Broadcast { public: -#ifdef __CUDACC__ +#ifdef __CUDABLAS__ template - static __device__ void transformCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, @@ -75,67 +72,83 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo); template - static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template - static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); + static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo); - static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); - static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); + static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo); template - static __device__ void transformInverseCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformInverseCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template - static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); - static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else static void execInverse(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, sd::LoopKind::Kind loopKind, - uint64_t start, - uint64_t stop); + uint64_t start, uint64_t stop); /** * CPU execution @@ -149,39 +162,25 @@ namespace functions { * @param dimensionLength the length of the dimension buffer */ template - static void exec(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, + static void exec(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, sd::LoopKind::Kind loopKind, - uint64_t start, - uint64_t stop); + uint64_t start, uint64_t stop); template - static void execInverse(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + static void execInverse(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); - static void exec(const int opNum, + static void exec(int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); diff --git a/libnd4j/include/loops/broadcasting_bool.h b/libnd4j/include/loops/broadcasting_bool.h index 9bab82c81..400269c02 100644 --- a/libnd4j/include/loops/broadcasting_bool.h +++ b/libnd4j/include/loops/broadcasting_bool.h @@ -58,16 +58,13 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, @@ -76,7 +73,7 @@ namespace functions { void *extraParams); template - static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *result, Nd4jLong const* resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ); template static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, @@ -85,7 +82,7 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo, void *extraParams); - static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *result, Nd4jLong const* resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ); static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, @@ -94,63 +91,61 @@ namespace functions { void *extraParams); template - static __device__ void transformInverseCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformInverseCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template - static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); - static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); - static void exec(const int opNum, + static void exec(int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo, void *extraParams); static void execInverse(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); /** * CPU execution @@ -164,21 +159,14 @@ namespace functions { * @param dimensionLength the length of the dimension buffer */ template - static void exec(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, + static void exec(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); template static void exec(const void *x, const Nd4jLong *xShapeInfo, @@ -187,21 +175,14 @@ namespace functions { void *extraParams); template - static void execInverse(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + static void execInverse(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/broadcasting_int.h b/libnd4j/include/loops/broadcasting_int.h index 81149ad8a..386fbd3f7 100644 --- a/libnd4j/include/loops/broadcasting_int.h +++ b/libnd4j/include/loops/broadcasting_int.h @@ -58,15 +58,12 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, @@ -74,7 +71,13 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo); template - static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, @@ -82,7 +85,14 @@ namespace functions { const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); - static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, @@ -90,59 +100,55 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo); template - static __device__ void transformInverseCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformInverseCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template - static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); - static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); - static void exec(const int opNum, + static void exec(int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); static void execInverse(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); /** * CPU execution @@ -156,20 +162,13 @@ namespace functions { * @param dimensionLength the length of the dimension buffer */ template - static void exec(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + static void exec(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); template static void exec(const void *x, const Nd4jLong *xShapeInfo, @@ -177,20 +176,13 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo); template - static void execInverse(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + static void execInverse(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/cpu/broadcasting.hpp b/libnd4j/include/loops/cpu/broadcasting.hpp index 8de52cca7..c0f22313b 100644 --- a/libnd4j/include/loops/cpu/broadcasting.hpp +++ b/libnd4j/include/loops/cpu/broadcasting.hpp @@ -34,20 +34,13 @@ namespace broadcast { template void Broadcast::execInverse(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_TTT(execInverse, PARAMS(x, xShapeInfo, y, @@ -64,21 +57,14 @@ namespace broadcast { template void Broadcast::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - sd::LoopKind::Kind loopKind, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + sd::LoopKind::Kind loopKind, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_TTT(exec, PARAMS(x, xShapeInfo, y, @@ -96,24 +82,17 @@ namespace broadcast { template template - void Broadcast::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - sd::LoopKind::Kind loopKind, - uint64_t start, - uint64_t stop) { + void Broadcast::exec(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + sd::LoopKind::Kind loopKind, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -397,23 +376,16 @@ namespace broadcast { template template - void Broadcast::execInverse(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *yTadShapeInfo, - Nd4jLong *yTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void Broadcast::execInverse(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after diff --git a/libnd4j/include/loops/cpu/broadcasting_bool.hpp b/libnd4j/include/loops/cpu/broadcasting_bool.hpp index 21b40cb55..18c8705e2 100644 --- a/libnd4j/include/loops/cpu/broadcasting_bool.hpp +++ b/libnd4j/include/loops/cpu/broadcasting_bool.hpp @@ -33,21 +33,14 @@ namespace broadcast { template void BroadcastBool::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, y, @@ -75,21 +68,14 @@ namespace broadcast { template void BroadcastBool::execInverse(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_TT(execInverse, PARAMS(x, xShapeInfo, y, @@ -107,24 +93,17 @@ namespace broadcast { template template - void BroadcastBool::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void BroadcastBool::exec(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -138,8 +117,8 @@ namespace broadcast { if (xTadShapeInfo == nullptr || tadOffsets == nullptr) { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - xTadShapeShapeInfo = tadPack.primaryShapeInfo(); - tadOffsets = tadPack.primaryOffsets(); + xTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); + tadOffsets = const_cast(tadPack.primaryOffsets()); } //int *resultStride = shape::stride(xTadShapeShapeInfo); @@ -279,24 +258,17 @@ namespace broadcast { template template - void BroadcastBool::execInverse(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, - int *dimension, - int dimensionLength, - Nd4jLong *yTadShapeInfo, - Nd4jLong *yTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void BroadcastBool::execInverse(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + int *dimension, int dimensionLength, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -310,8 +282,8 @@ namespace broadcast { if (yTadShapeInfo == nullptr || tadOffsets == nullptr) { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); - yTadShapeShapeInfo = tadPack.primaryShapeInfo(); - tadOffsets = tadPack.primaryOffsets(); + yTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); + tadOffsets = const_cast(tadPack.primaryOffsets()); } //int *resultStride = shape::stride(yTadShapeShapeInfo); diff --git a/libnd4j/include/loops/cpu/broadcasting_int.hpp b/libnd4j/include/loops/cpu/broadcasting_int.hpp index 456994b16..7d0a995d6 100644 --- a/libnd4j/include/loops/cpu/broadcasting_int.hpp +++ b/libnd4j/include/loops/cpu/broadcasting_int.hpp @@ -33,20 +33,13 @@ namespace functions { template void BroadcastInt::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, y, @@ -72,20 +65,13 @@ namespace functions { template void BroadcastInt::execInverse(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_T(execInverse, PARAMS(x, xShapeInfo, y, @@ -102,23 +88,16 @@ namespace functions { template template - void BroadcastInt::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void BroadcastInt::exec(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -131,8 +110,8 @@ namespace functions { if (xTadShapeInfo == nullptr || tadOffsets == nullptr) { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - xTadShapeShapeInfo = tadPack.primaryShapeInfo(); - tadOffsets = tadPack.primaryOffsets(); + xTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); + tadOffsets = const_cast(tadPack.primaryOffsets()); } //int *resultStride = shape::stride(xTadShapeShapeInfo); @@ -272,23 +251,16 @@ namespace functions { template template - void BroadcastInt::execInverse(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *yTadShapeInfo, - Nd4jLong *yTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void BroadcastInt::execInverse(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, const int dimensionLength, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -301,8 +273,8 @@ namespace functions { if (yTadShapeInfo == nullptr || tadOffsets == nullptr) { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); - yTadShapeShapeInfo = tadPack.primaryShapeInfo(); - tadOffsets = tadPack.primaryOffsets(); + yTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); + tadOffsets = const_cast(tadPack.primaryOffsets()); } //int *resultStride = shape::stride(yTadShapeShapeInfo); diff --git a/libnd4j/include/loops/cpu/indexreduce.hpp b/libnd4j/include/loops/cpu/indexreduce.hpp index d4abd8c82..296fbcdef 100644 --- a/libnd4j/include/loops/cpu/indexreduce.hpp +++ b/libnd4j/include/loops/cpu/indexreduce.hpp @@ -33,27 +33,27 @@ namespace indexreduce { //////////////////////////////////////////////////////////////////////// template -Nd4jLong IndexReduce::execScalar( const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams) { +Nd4jLong IndexReduce::execScalar( const int opNum, const void *x, const Nd4jLong *xShapeInfo, void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams), INDEX_REDUCE_OPS); } //////////////////////////////////////////////////////////////////////// template void IndexReduce::exec(const int opNum, - void *x, Nd4jLong *xShapeInfo, - void *extraParams, - void *z, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffset), INDEX_REDUCE_OPS); } //////////////////////////////////////////////////////////////////////// template template -Nd4jLong IndexReduce::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { +Nd4jLong IndexReduce::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); //T startingVal = OpType::startingValue(x); @@ -107,13 +107,13 @@ Nd4jLong IndexReduce::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vex //////////////////////////////////////////////////////////////////////// template template -void IndexReduce::exec(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { +void IndexReduce::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -136,7 +136,7 @@ void IndexReduce::exec(void *vx, Nd4jLong *xShapeInfo, } auto tadOnlyShapeInfo = tadShapeInfo; - Nd4jLong *tadOffsets = tadOffset; + auto tadOffsets = tadOffset; if (tadOnlyShapeInfo == nullptr || tadOffsets == nullptr) { if (dimensionLength < 1) diff --git a/libnd4j/include/loops/cpu/pairwise.hpp b/libnd4j/include/loops/cpu/pairwise.hpp index 27c97efa9..45fe46e8f 100644 --- a/libnd4j/include/loops/cpu/pairwise.hpp +++ b/libnd4j/include/loops/cpu/pairwise.hpp @@ -34,18 +34,13 @@ namespace functions { namespace pairwise_transforms { template - void PairWiseTransform::exec( - const int opNum, - void *x, - Nd4jLong xEws, - void *y, - Nd4jLong yEws, - void *z, - Nd4jLong zEws, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop) { + void PairWiseTransform::exec(const int opNum, + const void *x, Nd4jLong xEws, + const void *y, Nd4jLong yEws, + void *z, Nd4jLong zEws, + void *extraParams, + Nd4jLong n, + const uint64_t start,const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(exec, PARAMS(x, xEws, y, @@ -60,16 +55,16 @@ namespace functions { template template - void PairWiseTransform::exec(void *vx, Nd4jLong xEws, - void *vy, Nd4jLong yEws, + void PairWiseTransform::exec(const void *vx, Nd4jLong xEws, + const void *vy, Nd4jLong yEws, void *vz, Nd4jLong zEws, void *vextraParams, const Nd4jLong n, const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -86,17 +81,12 @@ namespace functions { } template - void PairWiseTransform::exec( - const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseTransform::exec(const int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(exec, PARAMS(x, xShapeInfo, y, @@ -110,19 +100,14 @@ namespace functions { template template - void PairWiseTransform::exec( - void *vx, - Nd4jLong* xShapeInfo, - void *vy, - Nd4jLong* yShapeInfo, - void *vz, - Nd4jLong* zShapeInfo, - void *vextraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseTransform::exec(const void *vx, const Nd4jLong* xShapeInfo, + const void *vy, const Nd4jLong* yShapeInfo, + void *vz, const Nd4jLong* zShapeInfo, + void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/pairwise_bool.cpp b/libnd4j/include/loops/cpu/pairwise_bool.cpp index d77413e8c..dfcdf6bfa 100644 --- a/libnd4j/include/loops/cpu/pairwise_bool.cpp +++ b/libnd4j/include/loops/cpu/pairwise_bool.cpp @@ -30,18 +30,13 @@ namespace functions { namespace pairwise_transforms { template - void PairWiseBoolTransform::exec( - const int opNum, - void *x, - Nd4jLong xEws, - void *y, - Nd4jLong yEws, - void *z, - Nd4jLong zEws, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop) { + void PairWiseBoolTransform::exec(const int opNum, + const void *x, Nd4jLong xEws, + const void *y, Nd4jLong yEws, + void *z, Nd4jLong zEws, + void *extraParams, + Nd4jLong n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xEws, y, @@ -56,19 +51,15 @@ namespace functions { template template - void PairWiseBoolTransform::exec(void *vx, - Nd4jLong xEws, - void *vy, - Nd4jLong yEws, - void *vz, - Nd4jLong zEws, - void *vextraParams, - const Nd4jLong n, - const uint64_t start, - const uint64_t stop) { + void PairWiseBoolTransform::exec(const void *vx, Nd4jLong xEws, + const void *vy, Nd4jLong yEws, + void *vz, Nd4jLong zEws, + void *vextraParams, + const Nd4jLong n, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -85,17 +76,12 @@ namespace functions { } template - void PairWiseBoolTransform::exec( - const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseBoolTransform::exec(const int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + const uint64_t start,const uint64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, y, @@ -109,15 +95,14 @@ namespace functions { template template - void PairWiseBoolTransform::exec(void *vx, Nd4jLong* xShapeInfo, - void *vy, Nd4jLong* yShapeInfo, - void *vz, Nd4jLong* zShapeInfo, - void *vextraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseBoolTransform::exec(const void *vx, const Nd4jLong* xShapeInfo, + const void *vy, const Nd4jLong* yShapeInfo, + void *vz, const Nd4jLong* zShapeInfo, + void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/pairwise_int.cpp b/libnd4j/include/loops/cpu/pairwise_int.cpp index 9af092a0f..b82216611 100644 --- a/libnd4j/include/loops/cpu/pairwise_int.cpp +++ b/libnd4j/include/loops/cpu/pairwise_int.cpp @@ -30,18 +30,13 @@ namespace functions { namespace pairwise_transforms { template - void PairWiseIntTransform::exec( - const int opNum, - void *x, - Nd4jLong xEws, - void *y, - Nd4jLong yEws, - void *z, - Nd4jLong zEws, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop) { + void PairWiseIntTransform::exec(const int opNum, + const void *x, Nd4jLong xEws, + const void *y, Nd4jLong yEws, + void *z, Nd4jLong zEws, + void *extraParams, + Nd4jLong n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xEws, y, @@ -56,19 +51,15 @@ namespace functions { template template - void PairWiseIntTransform::exec(void *vx, - Nd4jLong xEws, - void *vy, - Nd4jLong yEws, - void *vz, - Nd4jLong zEws, - void *vextraParams, - const Nd4jLong n, - const uint64_t start, - const uint64_t stop) { - - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + void PairWiseIntTransform::exec(const void *vx, Nd4jLong xEws, + const void *vy, Nd4jLong yEws, + void *vz, Nd4jLong zEws, + void *vextraParams, + const Nd4jLong n, + const uint64_t start, + const uint64_t stop) { + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -85,17 +76,12 @@ namespace functions { } template - void PairWiseIntTransform::exec( - const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseIntTransform::exec(const int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, y, @@ -109,15 +95,15 @@ namespace functions { template template - void PairWiseIntTransform::exec(void *vx, Nd4jLong* xShapeInfo, - void *vy, Nd4jLong* yShapeInfo, - void *vz, Nd4jLong* zShapeInfo, - void *vextraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseIntTransform::exec(const void *vx, const Nd4jLong* xShapeInfo, + const void *vy, const Nd4jLong* yShapeInfo, + void *vz, const Nd4jLong* zShapeInfo, + void *vextraParams, + const uint64_t start, + const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/random.hpp b/libnd4j/include/loops/cpu/random.hpp index 034179f07..ea1dc9e76 100644 --- a/libnd4j/include/loops/cpu/random.hpp +++ b/libnd4j/include/loops/cpu/random.hpp @@ -33,16 +33,13 @@ namespace functions { template template void RandomFunction::execTransform(Nd4jPointer state, - void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraArguments) { + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -166,12 +163,10 @@ namespace functions { template template void RandomFunction::execTransform(Nd4jPointer state, - void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraArguments) { - auto x = reinterpret_cast(vx); + const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraArguments) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -227,7 +222,7 @@ namespace functions { template template - void RandomFunction::execTransform(Nd4jPointer state, void *vz, Nd4jLong *zShapeInfo, void *vextraArguments) { + void RandomFunction::execTransform(Nd4jPointer state, void *vz, const Nd4jLong *zShapeInfo, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -266,17 +261,17 @@ namespace functions { } template - void RandomFunction::execTransform(int opNum, Nd4jPointer state, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraArguments) { + void RandomFunction::execTransform(int opNum, Nd4jPointer state, const void *x, const Nd4jLong *xShapeInfo, void *z, const Nd4jLong *zShapeInfo, void *extraArguments) { DISPATCH_BY_OPNUM_T(execTransform, PARAMS(state, x, xShapeInfo, z, zShapeInfo, extraArguments), RANDOM_OPS) } template - void RandomFunction::execTransform(int opNum, Nd4jPointer state, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraArguments) { + void RandomFunction::execTransform(int opNum, Nd4jPointer state, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo, void *extraArguments) { DISPATCH_BY_OPNUM_T(execTransform, PARAMS(state, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraArguments), RANDOM_OPS) } template - void RandomFunction::execTransform(int opNum, Nd4jPointer state, void *z, Nd4jLong *zShapeInfo, void *extraArguments) { + void RandomFunction::execTransform(int opNum, Nd4jPointer state, void *z, const Nd4jLong *zShapeInfo, void *extraArguments) { DISPATCH_BY_OPNUM_T(execTransform, PARAMS(state, z, zShapeInfo, extraArguments), RANDOM_OPS) } diff --git a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp index afb441a45..708f3c0d7 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp @@ -33,12 +33,10 @@ namespace functions { namespace reduce { template template - void _CUDA_H ReduceBoolFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); + void _CUDA_H ReduceBoolFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -78,9 +76,9 @@ namespace functions { template template - Z _CUDA_H ReduceBoolFunction::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { + Z _CUDA_H ReduceBoolFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); const Nd4jLong length = shape::length(xShapeInfo); @@ -103,49 +101,39 @@ namespace functions { template Y ReduceBoolFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams), REDUCE_BOOL_OPS); } template void ReduceBoolFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo), REDUCE_BOOL_OPS); } template void ReduceBoolFunction::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffset, start, stop), REDUCE_BOOL_OPS); } template template - void _CUDA_H ReduceBoolFunction::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vresult, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + void _CUDA_H ReduceBoolFunction::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vresult, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vresult); auto extraParams = reinterpret_cast(vextraParams); @@ -193,20 +181,17 @@ namespace functions { template template - void _CUDA_H ReduceBoolFunction::exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vresult, - Nd4jLong *resultShapeInfo) { - // FIXME: wtf??? + void _CUDA_H ReduceBoolFunction::exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vresult, const Nd4jLong *resultShapeInfo) { auto z = reinterpret_cast(vresult); z[0] = execScalar(x, xShapeInfo, extraParams); } template template - Z _CUDA_H ReduceBoolFunction::execScalar(void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { - auto x = reinterpret_cast(vx); + Z _CUDA_H ReduceBoolFunction::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); Z intermediate[64]; diff --git a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp index 40c24f4fa..1795dbc3d 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp @@ -33,12 +33,10 @@ namespace functions { namespace reduce { template template - void _CUDA_H ReduceFloatFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); + void _CUDA_H ReduceFloatFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -98,8 +96,8 @@ namespace functions { template template - Z _CUDA_H ReduceFloatFunction::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); + Z _CUDA_H ReduceFloatFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); const Nd4jLong length = shape::length(xShapeInfo); @@ -122,33 +120,27 @@ namespace functions { template Y ReduceFloatFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams), REDUCE_FLOAT_OPS); } template void ReduceFloatFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo), REDUCE_FLOAT_OPS); } template void ReduceFloatFunction::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, extraParams, @@ -163,17 +155,14 @@ namespace functions { template template - void _CUDA_H ReduceFloatFunction::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vresult, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + void _CUDA_H ReduceFloatFunction::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vresult, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vresult); auto extraParams = reinterpret_cast(vextraParams); @@ -226,11 +215,9 @@ namespace functions { template template - void _CUDA_H ReduceFloatFunction::exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vresult, - Nd4jLong *resultShapeInfo) { + void _CUDA_H ReduceFloatFunction::exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vresult, const Nd4jLong *resultShapeInfo) { // FIXME: wtf??? auto z = reinterpret_cast(vresult); z[0] = execScalar(x, xShapeInfo, extraParams); @@ -238,9 +225,9 @@ namespace functions { template template - Z _CUDA_H ReduceFloatFunction::execScalar(void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { + Z _CUDA_H ReduceFloatFunction::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); Z intermediate[64]; diff --git a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp index 98b462ebd..c1fd4385c 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp @@ -33,12 +33,10 @@ namespace functions { namespace reduce { template template - void _CUDA_H ReduceLongFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); + void _CUDA_H ReduceLongFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -93,10 +91,8 @@ namespace functions { template template - Z _CUDA_H ReduceLongFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams) { - auto x = reinterpret_cast(vx); + Z _CUDA_H ReduceLongFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); const Nd4jLong length = shape::length(xShapeInfo); @@ -120,49 +116,40 @@ namespace functions { template Y ReduceLongFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams), REDUCE_LONG_OPS); } template void ReduceLongFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo), REDUCE_LONG_OPS); } template void ReduceLongFunction::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffset, start, stop), REDUCE_LONG_OPS); } template template - void _CUDA_H ReduceLongFunction::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vresult, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + void _CUDA_H ReduceLongFunction::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vresult, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vresult); auto extraParams = reinterpret_cast(vextraParams); @@ -215,21 +202,18 @@ namespace functions { template template - void _CUDA_H ReduceLongFunction::exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vresult, - Nd4jLong *resultShapeInfo) { - // FIXME: wtf??? + void _CUDA_H ReduceLongFunction::exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vresult, const Nd4jLong *resultShapeInfo) { auto z = reinterpret_cast(vresult); z[0] = execScalar(x, xShapeInfo, extraParams); } template template - Z _CUDA_H ReduceLongFunction::execScalar(void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { + Z _CUDA_H ReduceLongFunction::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); Z intermediate[64]; diff --git a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp index f357b7e64..2516767b6 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp @@ -34,12 +34,10 @@ namespace functions { namespace reduce { template template - void _CUDA_H ReduceSameFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); + void _CUDA_H ReduceSameFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -95,10 +93,8 @@ namespace functions { template template - X _CUDA_H ReduceSameFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams) { - auto x = reinterpret_cast(vx); + X _CUDA_H ReduceSameFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); const Nd4jLong length = shape::length(xShapeInfo); @@ -120,33 +116,27 @@ namespace functions { template X ReduceSameFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_T(execScalar, PARAMS(x, xShapeInfo, extraParams), REDUCE_SAME_OPS); } template void ReduceSameFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_T(execScalar, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo), REDUCE_SAME_OPS); } template void ReduceSameFunction::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, extraParams, @@ -161,17 +151,14 @@ namespace functions { template template - void _CUDA_H ReduceSameFunction::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + void _CUDA_H ReduceSameFunction::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -224,21 +211,18 @@ namespace functions { template template - void _CUDA_H ReduceSameFunction::exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *zShapeInfo) { - // FIXME: wtf??? + void _CUDA_H ReduceSameFunction::exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo) { auto z = reinterpret_cast(vz); z[0] = execScalar(x, xShapeInfo, extraParams); } template template - X _CUDA_H ReduceSameFunction::execScalar(void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { + X _CUDA_H ReduceSameFunction::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); X intermediate[64]; diff --git a/libnd4j/include/loops/cpu/reduce3.hpp b/libnd4j/include/loops/cpu/reduce3.hpp index 961c6b1c8..3a830377e 100644 --- a/libnd4j/include/loops/cpu/reduce3.hpp +++ b/libnd4j/include/loops/cpu/reduce3.hpp @@ -34,13 +34,13 @@ namespace reduce3 { ////////////////////////////////////////////////////////////////////////// template template -void Reduce3::execScalar(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo) { +void Reduce3::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -134,10 +134,10 @@ void Reduce3::execScalar(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template void Reduce3::execScalar(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *extraParamsVals, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo) { + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(vx, xShapeInfo, extraParamsVals, vy, yShapeInfo, vz, zShapeInfo), REDUCE3_OPS); } @@ -146,14 +146,15 @@ void Reduce3::execScalar(const int opNum, ////////////////////////////////////////////////////////////////////////// template template -void Reduce3::exec(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, int64_t start, int64_t stop) { +void Reduce3::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -171,15 +172,16 @@ void Reduce3::exec(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template template -void Reduce3::exec(void *vx, Nd4jLong *xShapeInfo, +void Reduce3::exec(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, int64_t start, int64_t stop) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); #ifdef INLINE_LOOPS @@ -193,16 +195,17 @@ void Reduce3::exec(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template template -void Reduce3:: execAll(void *vx, Nd4jLong *xShapeInfo, +void Reduce3:: execAll(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets, int64_t start, int64_t stop) { + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -215,12 +218,13 @@ void Reduce3:: execAll(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template -void Reduce3::exec( const int opNum, - void *vx, Nd4jLong *xShapeInfo, +void Reduce3::exec(const int opNum, + const void *vx, const Nd4jLong *xShapeInfo, void *extraParamsVals, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, int64_t start, int64_t stop) { + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(vx, xShapeInfo, extraParamsVals, vy, yShapeInfo, vz, zShapeInfo, dimension, dimensionLength, start, stop), REDUCE3_OPS); } @@ -228,13 +232,14 @@ void Reduce3::exec( const int opNum, ////////////////////////////////////////////////////////////////////////// template -void Reduce3::exec( const int opNum, - void *vx, Nd4jLong *xShapeInfo, +void Reduce3::exec(const int opNum, + const void *vx, const Nd4jLong *xShapeInfo, void *extraParamsVals, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, int64_t start, int64_t stop) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(vx,xShapeInfo,extraParamsVals,vy, yShapeInfo,vz,zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, start, stop), REDUCE3_OPS); } @@ -243,13 +248,14 @@ void Reduce3::exec( const int opNum, ////////////////////////////////////////////////////////////////////////// template void Reduce3::execAll(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *extraParamsVals, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets, int64_t start, int64_t stop) { + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(execAll, PARAMS(vx, xShapeInfo, extraParamsVals, vy, yShapeInfo, vz, zShapeInfo, dimension, dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets, start, stop), REDUCE3_OPS); } diff --git a/libnd4j/include/loops/cpu/scalar.hpp b/libnd4j/include/loops/cpu/scalar.hpp index d93db7c8f..236ba7e25 100644 --- a/libnd4j/include/loops/cpu/scalar.hpp +++ b/libnd4j/include/loops/cpu/scalar.hpp @@ -34,18 +34,18 @@ namespace scalar { //////////////////////////////////////////////////////////////////////// template template -void ScalarTransform::transform(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { +void ScalarTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalars = reinterpret_cast(vscalars); + auto scalars = reinterpret_cast(vscalars); auto extraParams = reinterpret_cast(vextraParams); if (zTadShapeInfo == nullptr) { @@ -92,14 +92,14 @@ void ScalarTransform::transform(void *vx, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template void ScalarTransform::transform(int opNum, - void *x, Nd4jLong *xShapeInfo, - void *extraParams, - void *z, Nd4jLong *zShapeInfo, - void *scalars, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(transform, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, xTadShapeInfo, xTadOffsets, zTadShapeInfo, zTadOffsets, start, stop), SCALAR_OPS); } @@ -107,12 +107,12 @@ void ScalarTransform::transform(int opNum, //////////////////////////////////////////////////////////////////////// template void ScalarTransform::transform(const int opNum, - void *x, Nd4jLong xStride, - void *z, Nd4jLong zStride, - void *scalar, - void *extraParams, - const uint64_t n, - const uint64_t start, const uint64_t stop) { + const void *x, Nd4jLong xStride, + void *z, Nd4jLong zStride, + const void *scalar, + void *extraParams, + const uint64_t n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(transform, PARAMS(x, xStride, z, zStride, scalar, extraParams, n, start, stop), SCALAR_OPS); } @@ -120,11 +120,11 @@ void ScalarTransform::transform(const int opNum, //////////////////////////////////////////////////////////////////////// template void ScalarTransform::transform(const int opNum, - void *x, Nd4jLong *xShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *scalar, - void *extraParams, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalar, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(transform, PARAMS(x, xShapeInfo, z, zShapeInfo, scalar, extraParams, start, stop), SCALAR_OPS); } @@ -132,15 +132,15 @@ void ScalarTransform::transform(const int opNum, //////////////////////////////////////////////////////////////////////// template template -void ScalarTransform::transform(void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void *vscalar, - void *vextraParams, - const uint64_t start, const uint64_t stop) { +void ScalarTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalar, + void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); const auto len = shape::length(xShapeInfo); @@ -181,15 +181,15 @@ void ScalarTransform::transform(void *vx, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template template -void ScalarTransform::transform(void *vx, Nd4jLong xEws, - void *vz, Nd4jLong zEws, - void *vscalar, - void *vextraParams, - const uint64_t len, const uint64_t start, const uint64_t stop) { +void ScalarTransform::transform(const void *vx, Nd4jLong xEws, + void *vz, Nd4jLong zEws, + const void *vscalar, + void *vextraParams, + const uint64_t len, const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); if (xEws == 1 && zEws == 1) { diff --git a/libnd4j/include/loops/cpu/scalar_bool.cpp b/libnd4j/include/loops/cpu/scalar_bool.cpp index c6f437ba8..72513c10d 100644 --- a/libnd4j/include/loops/cpu/scalar_bool.cpp +++ b/libnd4j/include/loops/cpu/scalar_bool.cpp @@ -34,18 +34,18 @@ namespace functions { template template - void ScalarBoolTransform::transform(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { + void ScalarBoolTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalars = reinterpret_cast(vscalars); + auto scalars = reinterpret_cast(vscalars); auto extraParams = reinterpret_cast(vextraParams); if (zTadShapeInfo == nullptr) { @@ -92,60 +92,50 @@ namespace functions { template void ScalarBoolTransform::transform(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - void *scalars, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets, const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, xTadShapeInfo, xTadOffsets, zTadShapeInfo, zTadOffsets, start, stop), SCALAR_BOOL_OPS); } template void ScalarBoolTransform::transform(const int opNum, - void *x, - Nd4jLong xEws, - void *z, - Nd4jLong zEws, - void *scalar, - void *extraParams, - const uint64_t n, - const uint64_t start, const uint64_t stop) { + const void *x, Nd4jLong xEws, + void *z, Nd4jLong zEws, + const void *scalar, + void *extraParams, + const uint64_t n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(x, xEws, z, zEws, scalar, extraParams, n, start, stop), SCALAR_BOOL_OPS); } template void ScalarBoolTransform::transform(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *scalar, - void *extraParams, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalar, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(x, xShapeInfo, z, zShapeInfo, scalar, extraParams, start, stop), SCALAR_BOOL_OPS); } template template - void ScalarBoolTransform::transform(void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vscalar, - void *vextraParams, - const uint64_t start, const uint64_t stop) { + void ScalarBoolTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalar, + void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -185,18 +175,16 @@ namespace functions { template template - void ScalarBoolTransform::transform(void *vx, - Nd4jLong xEws, - void *vz, - Nd4jLong zEws, - void *vscalar, - void *vextraParams, - const uint64_t len, - const uint64_t start, const uint64_t stop) { + void ScalarBoolTransform::transform(const void *vx, Nd4jLong xEws, + void *vz, Nd4jLong zEws, + const void *vscalar, + void *vextraParams, + const uint64_t len, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); if (xEws == 1 && zEws == 1) { diff --git a/libnd4j/include/loops/cpu/scalar_int.cpp b/libnd4j/include/loops/cpu/scalar_int.cpp index ed85e28ef..1a8f5bcca 100644 --- a/libnd4j/include/loops/cpu/scalar_int.cpp +++ b/libnd4j/include/loops/cpu/scalar_int.cpp @@ -34,18 +34,18 @@ namespace functions { template template - void ScalarIntTransform::transform(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { + void ScalarIntTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalars = reinterpret_cast(vscalars); + auto scalars = reinterpret_cast(vscalars); auto extraParams = reinterpret_cast(vextraParams); if (zTadShapeInfo == nullptr) { @@ -92,19 +92,14 @@ namespace functions { template void ScalarIntTransform::transform(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - void *scalars, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(transform, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, xTadShapeInfo, xTadOffsets, zTadShapeInfo, zTadOffsets, start, stop), SCALAR_INT_OPS); } @@ -112,42 +107,35 @@ namespace functions { template void ScalarIntTransform::transform(const int opNum, - void *x, - Nd4jLong xEws, - void *z, - Nd4jLong zEws, - void *scalar, - void *extraParams, - const uint64_t n, - const uint64_t start, const uint64_t stop) { + const void *x, Nd4jLong xEws, + void *z, Nd4jLong zEws, + const void *scalar, + void *extraParams, + const uint64_t n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(transform, PARAMS(x, xEws, z, zEws, scalar, extraParams, n, start, stop), SCALAR_INT_OPS); } template void ScalarIntTransform::transform(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *scalar, - void *extraParams, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalar, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(transform, PARAMS(x, xShapeInfo, z, zShapeInfo, scalar, extraParams, start, stop), SCALAR_INT_OPS); } template template - void ScalarIntTransform::transform(void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vscalar, - void *vextraParams, - const uint64_t start, const uint64_t stop) { + void ScalarIntTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalar, void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -187,18 +175,15 @@ namespace functions { template template - void ScalarIntTransform::transform(void *vx, - Nd4jLong xEws, - void *vz, - Nd4jLong zEws, - void *vscalar, - void *vextraParams, - const uint64_t len, - const uint64_t start, const uint64_t stop) { + void ScalarIntTransform::transform(const void *vx, Nd4jLong xEws, + void *vz, Nd4jLong zEws, + const void *vscalar, + void *vextraParams, + const uint64_t len, const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); if (scalar < (sizeof(X) * 8)) { diff --git a/libnd4j/include/loops/cpu/summarystatsreduce.cpp b/libnd4j/include/loops/cpu/summarystatsreduce.cpp index f6b44b75c..2d53671d2 100644 --- a/libnd4j/include/loops/cpu/summarystatsreduce.cpp +++ b/libnd4j/include/loops/cpu/summarystatsreduce.cpp @@ -34,54 +34,46 @@ namespace functions { template Y SummaryStatsReduce::execScalar(const int opNum, - const bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(biasCorrected, x, xShapeInfo, extraParams), SUMMARY_STATS_OPS); } template void SummaryStatsReduce::execScalar(const int opNum, - const bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(biasCorrected, x, xShapeInfo, extraParams, z, zShapeInfo), SUMMARY_STATS_OPS); } template void SummaryStatsReduce::exec(const int opNum, - const bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength) { + const bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(biasCorrected, x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength), SUMMARY_STATS_OPS); } template template void SummaryStatsReduce::execScalar(const bool biasCorrected, - void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { + const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { auto z = reinterpret_cast(vz); z[0] = execScalar(biasCorrected, vx, xShapeInfo, vextraParams); } template template - Z SummaryStatsReduce::execScalar(const bool biasCorrected, void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { + Z SummaryStatsReduce::execScalar(const bool biasCorrected, const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); SummaryStatsData startingIndex; @@ -105,15 +97,12 @@ namespace functions { template template void SummaryStatsReduce::exec(const bool biasCorrected, - void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength) { + const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto resultLength = shape::length(zShapeInfo); diff --git a/libnd4j/include/loops/cpu/transform/transform_any.cpp b/libnd4j/include/loops/cpu/transform/transform_any.cpp index 3fc9af1b3..6a8c07094 100644 --- a/libnd4j/include/loops/cpu/transform/transform_any.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_any.cpp @@ -30,25 +30,23 @@ namespace functions { namespace transform { template - void TransformAny::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - uint64_t threadId, uint64_t numThreads) { + void TransformAny::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_ANY_OPS); } ///////////////////////////////////////////////////////////////////// template template -void _CUDA_H TransformAny::exec(void *vx, Nd4jLong *xShapeInfo, - void *vz,Nd4jLong *zShapeInfo, - void *vextraParams, uint64_t threadId, uint64_t numThreads) { +void _CUDA_H TransformAny::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/transform/transform_bool.cpp b/libnd4j/include/loops/cpu/transform/transform_bool.cpp index 7302ef970..5e88a15c3 100644 --- a/libnd4j/include/loops/cpu/transform/transform_bool.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_bool.cpp @@ -30,27 +30,22 @@ namespace functions { namespace transform { template - void TransformBool::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - uint64_t threadId, uint64_t numThreads) { + void TransformBool::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_BOOL_OPS); } template template - void _CUDA_H TransformBool::exec( - void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, uint64_t threadId, uint64_t numThreads) { + void _CUDA_H TransformBool::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/transform/transform_float.cpp b/libnd4j/include/loops/cpu/transform/transform_float.cpp index 833b263f1..fd37391c2 100644 --- a/libnd4j/include/loops/cpu/transform/transform_float.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_float.cpp @@ -29,27 +29,22 @@ using namespace simdOps; namespace functions { namespace transform { template - void TransformFloat::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - uint64_t threadId, uint64_t numThreads) { + void TransformFloat::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_FLOAT_OPS); } template template - void _CUDA_H TransformFloat::exec( - void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, uint64_t threadId, uint64_t numThreads) { + void _CUDA_H TransformFloat::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/transform/transform_same.cpp b/libnd4j/include/loops/cpu/transform/transform_same.cpp index bc9d2e525..d2793d9c0 100644 --- a/libnd4j/include/loops/cpu/transform/transform_same.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_same.cpp @@ -30,24 +30,22 @@ namespace functions { namespace transform { template - void TransformSame::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, uint64_t threadId, uint64_t numThreads) { + void TransformSame::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_SAME_OPS); } template template - void _CUDA_H TransformSame::exec(void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void _CUDA_H TransformSame::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, void *vextraParams, uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/transform/transform_strict.cpp b/libnd4j/include/loops/cpu/transform/transform_strict.cpp index 2ef3b808e..54a24d0e3 100644 --- a/libnd4j/include/loops/cpu/transform/transform_strict.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_strict.cpp @@ -30,26 +30,23 @@ namespace functions { namespace transform { template - void TransformStrict::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, uint64_t threadId, uint64_t numThreads) { + void TransformStrict::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, + const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_STRICT_OPS); } template template - void _CUDA_H TransformStrict::exec( - void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, uint64_t threadId, uint64_t numThreads) { + void _CUDA_H TransformStrict::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cuda/broadcasting.chpp b/libnd4j/include/loops/cuda/broadcasting.chpp index 848522a35..4b5c7833f 100644 --- a/libnd4j/include/loops/cuda/broadcasting.chpp +++ b/libnd4j/include/loops/cuda/broadcasting.chpp @@ -34,22 +34,22 @@ using namespace simdOps; template static __global__ void broadcastSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::Broadcast::template transformCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } template -static __global__ void broadcastSimple(const void *x, const Nd4jLong *xShapeInfo, - const void *y, const Nd4jLong *yShapeInfo, - void *z, const Nd4jLong *zShapeInfo ) { +static __global__ void broadcastSimple(const void const* x, const Nd4jLong const* xShapeInfo, + const void const* y, const Nd4jLong const* yShapeInfo, + void *z, const Nd4jLong const* zShapeInfo ) { functions::broadcast::Broadcast::template transformCuda(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo); } @@ -57,14 +57,14 @@ static __global__ void broadcastSimple(const void *x, const Nd4jLong *xShapeInfo template static __global__ void broadcastInverseSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::Broadcast::template transformInverseCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } @@ -73,17 +73,17 @@ static __global__ void broadcastInverseSimple( namespace functions { namespace broadcast { - static Nd4jLong __device__ __noinline__ getIndexOffset(Nd4jLong index, Nd4jLong *shapeInfo) { + static Nd4jLong __device__ __noinline__ getIndexOffset(Nd4jLong index, const Nd4jLong *shapeInfo) { return shape::getIndexOffset(index, shapeInfo); } - static Nd4jLong __device__ __noinline__ length(Nd4jLong *shapeInfo) { + static Nd4jLong __device__ __noinline__ length(const Nd4jLong *shapeInfo) { return shape::length(shapeInfo); } template template - __host__ void Broadcast::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void Broadcast::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void* z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); } @@ -94,14 +94,14 @@ namespace functions { } template - __host__ void Broadcast::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void Broadcast::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TTT(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_OPS)) DEBUG_KERNEL(stream, opNum); } template - __host__ void Broadcast::execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo) { + __host__ void Broadcast::execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong const* zShapeInfo) { DISPATCH_BY_OPNUM_TTT(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo), OPS_A(BROADCAST_OPS)) DEBUG_KERNEL(stream, opNum); @@ -109,12 +109,12 @@ namespace functions { template template - __host__ void Broadcast::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void Broadcast::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastInverseSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); } template - __host__ void Broadcast::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void Broadcast::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TTT(intermediateInverseBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_OPS)) DEBUG_KERNEL(stream, opNum); @@ -123,19 +123,19 @@ namespace functions { template template __device__ void Broadcast::transformInverseCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void* vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -189,19 +189,19 @@ namespace functions { template template __device__ void Broadcast::transformCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after diff --git a/libnd4j/include/loops/cuda/broadcasting_bool.cu b/libnd4j/include/loops/cuda/broadcasting_bool.cu index 1c7bc358e..bed00a20f 100644 --- a/libnd4j/include/loops/cuda/broadcasting_bool.cu +++ b/libnd4j/include/loops/cuda/broadcasting_bool.cu @@ -34,24 +34,24 @@ using namespace simdOps; ////////////////////////////////////////////////////////////////////////// template static __global__ void broadcastBoolSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::BroadcastBool::template transformCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo, extraParams, dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } ////////////////////////////////////////////////////////////////////////// template -static __global__ void broadcastBoolSimple(const void *x, const Nd4jLong *xShapeInfo, - const void *y, const Nd4jLong *yShapeInfo, - void *z, const Nd4jLong *zShapeInfo, +static __global__ void broadcastBoolSimple(const void const* x, const Nd4jLong const* xShapeInfo, + const void const* y, const Nd4jLong const* yShapeInfo, + void *z, const Nd4jLong const* zShapeInfo, void *extraParams) { functions::broadcast::BroadcastBool::template transformCuda(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams); @@ -59,15 +59,15 @@ static __global__ void broadcastBoolSimple(const void *x, const Nd4jLong *xShape ////////////////////////////////////////////////////////////////////////// template static __global__ void broadcastBoolInverseSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::BroadcastBool::template transformInverseCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,extraParams,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } @@ -78,7 +78,7 @@ namespace broadcast { ////////////////////////////////////////////////////////////////////////// template template -__host__ void BroadcastBool::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { +__host__ void BroadcastBool::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void* z, Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastBoolSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); sd::DebugHelper::checkErrorCode(stream, "intermediateBroadcastBool(...) failed"); } @@ -98,7 +98,7 @@ __host__ void BroadcastBool::intermediateBroadcast(dim3 launchDims, cudaStr ////////////////////////////////////////////////////////////////////////// template -__host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { +__host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TT(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_BOOL_OPS)) DEBUG_KERNEL(stream, opNum); @@ -119,14 +119,14 @@ __host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *s ////////////////////////////////////////////////////////////////////////// template template - __host__ void BroadcastBool::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void BroadcastBool::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastBoolInverseSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); sd::DebugHelper::checkErrorCode(stream, "intermediateBroadcastBool(...) failed"); } ////////////////////////////////////////////////////////////////////////// template - __host__ void BroadcastBool::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void BroadcastBool::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TT(intermediateInverseBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_BOOL_OPS)) DEBUG_KERNEL(stream, opNum); @@ -136,20 +136,20 @@ __host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *s template template __device__ void BroadcastBool::transformInverseCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -198,20 +198,20 @@ __host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *s template template __device__ void BroadcastBool::transformCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -235,7 +235,7 @@ __host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *s __syncthreads(); __shared__ Z *rZ; - __shared__ X *rX; + __shared__ X const* rX; for (int r = blockIdx.x; r < numTads; r += gridDim.x) { diff --git a/libnd4j/include/loops/cuda/broadcasting_int.cu b/libnd4j/include/loops/cuda/broadcasting_int.cu index 998ac9ae8..37cbf3eba 100644 --- a/libnd4j/include/loops/cuda/broadcasting_int.cu +++ b/libnd4j/include/loops/cuda/broadcasting_int.cu @@ -34,23 +34,23 @@ using namespace simdOps; ////////////////////////////////////////////////////////////////////////// template static __global__ void broadcastIntSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::BroadcastInt::template transformCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } ////////////////////////////////////////////////////////////////////////// template -static __global__ void broadcastIntSimple(const void *x, const Nd4jLong *xShapeInfo, - const void *y, const Nd4jLong *yShapeInfo, - void *z, const Nd4jLong *zShapeInfo) { +static __global__ void broadcastIntSimple(const void *x, const Nd4jLong const* xShapeInfo, + const void *y, const Nd4jLong const* yShapeInfo, + void *z, const Nd4jLong const* zShapeInfo) { functions::broadcast::BroadcastInt::template transformCuda(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo); } @@ -58,14 +58,14 @@ static __global__ void broadcastIntSimple(const void *x, const Nd4jLong *xShapeI ////////////////////////////////////////////////////////////////////////// template static __global__ void broadcastBoolInverseSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::BroadcastInt::template transformInverseCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } @@ -75,7 +75,7 @@ namespace broadcast { ////////////////////////////////////////////////////////////////////////// template template -__host__ void BroadcastInt::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { +__host__ void BroadcastInt::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastIntSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); } @@ -92,16 +92,16 @@ __host__ void BroadcastInt::intermediateBroadcast(dim3 launchDims, cudaStream ////////////////////////////////////////////////////////////////////////// template -__host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { +__host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_T(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_INT_OPS)) } ////////////////////////////////////////////////////////////////////////// template __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, - const void *x, const Nd4jLong *xShapeInfo, - const void *y, const Nd4jLong *yShapeInfo, - void *z, const Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong const* xShapeInfo, + const void *y, const Nd4jLong const* yShapeInfo, + void *z, const Nd4jLong const* zShapeInfo) { DISPATCH_BY_OPNUM_T(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo), OPS_A(BROADCAST_INT_OPS)) } @@ -109,13 +109,13 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre ////////////////////////////////////////////////////////////////////////// template template - __host__ void BroadcastInt::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void BroadcastInt::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastBoolInverseSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); } ////////////////////////////////////////////////////////////////////////// template - __host__ void BroadcastInt::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void BroadcastInt::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_T(intermediateInverseBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_INT_OPS)) } @@ -123,19 +123,19 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre template template __device__ void BroadcastInt::transformInverseCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -183,19 +183,19 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre template template __device__ void BroadcastInt::transformCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -218,7 +218,7 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre __syncthreads(); __shared__ X *rZ; - __shared__ X *rX; + __shared__ X const* rX; for (int r = blockIdx.x; r < numTads; r += gridDim.x) { @@ -250,9 +250,9 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre ////////////////////////////////////////////////////////////////////////// template template -__device__ void BroadcastInt::transformCuda(const void *vx, const Nd4jLong *xShapeInfo, - const void *vy, const Nd4jLong *yShapeInfo, - void *vz, const Nd4jLong *zShapeInfo) { +__device__ void BroadcastInt::transformCuda(const void *vx, const Nd4jLong const* xShapeInfo, + const void *vy, const Nd4jLong const* yShapeInfo, + void *vz, const Nd4jLong const* zShapeInfo) { const X* x = reinterpret_cast(vx); const X* y = reinterpret_cast(vy); diff --git a/libnd4j/include/loops/cuda/indexreduce.cu b/libnd4j/include/loops/cuda/indexreduce.cu index 6383458c9..e6a52b16a 100644 --- a/libnd4j/include/loops/cuda/indexreduce.cu +++ b/libnd4j/include/loops/cuda/indexreduce.cu @@ -31,14 +31,14 @@ using namespace simdOps; template static __global__ void simpleIndexReduceGeneric(const int op, - void *dx, - Nd4jLong *xShapeInfo, int xRank, + void const* dx, + Nd4jLong const* xShapeInfo, int xRank, void *extraParams, void *result, - Nd4jLong *zShapeInfo, int zRank, + Nd4jLong const* zShapeInfo, int zRank, int *dimension, int dimensionLength, - int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { functions::indexreduce::IndexReduce::transform(op,dx,xShapeInfo,extraParams,result,zShapeInfo,dimension,dimensionLength,postProcessOrNot,allocationBuffer,reductionBuffer,tadOnlyShapeInfo,tadOffsets); } @@ -49,15 +49,15 @@ namespace functions { template _CUDA_H void IndexReduce::executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, const int opNum, - void *dx, Nd4jLong *xShapeInfo, + void const* dx, Nd4jLong const* xShapeInfo, int xRank, void *extraParams, - void *result, Nd4jLong *zShapeInfo, + void *result, Nd4jLong const* zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { simpleIndexReduceGeneric<<>>(opNum, dx, xShapeInfo, xRank, @@ -70,7 +70,7 @@ namespace functions { } template - _CUDA_H void IndexReduce::executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int opNum, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong *zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void IndexReduce::executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int opNum, void const* dx, Nd4jLong const* xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong const* zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { simpleIndexReduceGeneric<<>>( opNum, dx, @@ -154,35 +154,35 @@ namespace functions { template __device__ void IndexReduce::transform( const int opNum, - void *x, - Nd4jLong *xShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, void *extraParams, void *result, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset) { + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffset) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(x, xShapeInfo, extraParams, result, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationBuffer, reductionBuffer, tadShapeInfo, tadOffset), INDEX_REDUCE_OPS); } template template - __device__ void IndexReduce::transform(void *vdx, Nd4jLong *xShapeInfo, + __device__ void IndexReduce::transform(void const* vdx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void* vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets){ + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets){ /**int * Gpu information for the problem */ - auto dx = reinterpret_cast(vdx); + auto dx = reinterpret_cast(vdx); auto z = reinterpret_cast(vz); auto extraParams = static_cast(vextraParams); auto reductionBuffer = static_cast(vreductionBuffer); diff --git a/libnd4j/include/loops/cuda/pairwise.chpp b/libnd4j/include/loops/cuda/pairwise.chpp index d3252d862..ee2c01695 100644 --- a/libnd4j/include/loops/cuda/pairwise.chpp +++ b/libnd4j/include/loops/cuda/pairwise.chpp @@ -28,13 +28,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////////////// template -__global__ static void pairwiseSimpleShaped(void* vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, +__global__ static void pairwiseSimpleShaped(void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -91,9 +91,9 @@ namespace pairwise_transforms { template template void __host__ PairWiseTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams){ pairwiseSimpleShaped<<>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams); @@ -101,7 +101,7 @@ void __host__ PairWiseTransform::intermediateShaped(dim3& launchDims, cud //////////////////////////////////////////////////////////////////////////////// template -void __host__ PairWiseTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams) { +void __host__ PairWiseTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void const* vy, Nd4jLong const* yShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void* vextraParams) { DISPATCH_BY_OPNUM_TTT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams), PAIRWISE_TRANSFORM_OPS); } diff --git a/libnd4j/include/loops/cuda/pairwise_bool.cu b/libnd4j/include/loops/cuda/pairwise_bool.cu index f697de814..29cc90f2c 100644 --- a/libnd4j/include/loops/cuda/pairwise_bool.cu +++ b/libnd4j/include/loops/cuda/pairwise_bool.cu @@ -28,13 +28,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////////////// template -__global__ static void pairwiseSimpleShaped(void* vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, +__global__ static void pairwiseSimpleShaped(void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -92,9 +92,9 @@ namespace pairwise_transforms { template template void _CUDA_H PairWiseBoolTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams){ pairwiseSimpleShaped<<>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams); @@ -103,7 +103,7 @@ void _CUDA_H PairWiseBoolTransform::intermediateShaped(dim3& launchDims, cu //////////////////////////////////////////////////////////////////////////////// template -void PairWiseBoolTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams) { +void PairWiseBoolTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void const* vy, Nd4jLong const* yShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { auto xType = sd::DataTypeUtils::fromT(); auto yType = sd::DataTypeUtils::fromT(); diff --git a/libnd4j/include/loops/cuda/pairwise_int.cu b/libnd4j/include/loops/cuda/pairwise_int.cu index 44447605e..740995cee 100644 --- a/libnd4j/include/loops/cuda/pairwise_int.cu +++ b/libnd4j/include/loops/cuda/pairwise_int.cu @@ -28,13 +28,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////////////// template -__global__ static void pairwiseSimpleShaped(void* vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, +__global__ static void pairwiseSimpleShaped(void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -92,9 +92,9 @@ namespace pairwise_transforms { template template void _CUDA_H PairWiseIntTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams){ pairwiseSimpleShaped<<>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams); @@ -103,7 +103,7 @@ void _CUDA_H PairWiseIntTransform::intermediateShaped(dim3& launchDims, cudaS //////////////////////////////////////////////////////////////////////////////// template -void PairWiseIntTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams) { +void PairWiseIntTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void const* vy, Nd4jLong const* yShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { auto xType = sd::DataTypeUtils::fromT(); DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams), PAIRWISE_INT_OPS); diff --git a/libnd4j/include/loops/cuda/random.cu b/libnd4j/include/loops/cuda/random.cu index c7550b926..755763293 100644 --- a/libnd4j/include/loops/cuda/random.cu +++ b/libnd4j/include/loops/cuda/random.cu @@ -32,7 +32,7 @@ template static inline __device__ void randomSingleGeneric( Nd4jPointer state, void *z, - Nd4jLong *zShapeBuffer, + Nd4jLong const* zShapeBuffer, void *extraArguments) { @@ -46,10 +46,10 @@ static inline __device__ void randomSingleGeneric( template static inline __device__ void randomDoubleGeneric( Nd4jPointer state, - void *x, - Nd4jLong *xShapeBuffer, + void const* x, + Nd4jLong const* xShapeBuffer, void *z, - Nd4jLong *zShapeBuffer, + Nd4jLong const* zShapeBuffer, void *extraArguments) { @@ -66,12 +66,12 @@ static inline __device__ void randomDoubleGeneric( template static inline __device__ void randomTripleGeneric( Nd4jPointer state, - void *x, - Nd4jLong *xShapeBuffer, - void *y, - Nd4jLong *yShapeBuffer, + void const* x, + Nd4jLong const* xShapeBuffer, + void const* y, + Nd4jLong const* yShapeBuffer, void *z, - Nd4jLong *zShapeBuffer, + Nd4jLong const* zShapeBuffer, void *extraArguments) { @@ -89,20 +89,20 @@ static inline __device__ void randomTripleGeneric( #ifndef __CLION_IDE__ // here we generate kernels for target operations -DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, float, INPUT(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, double, INPUT(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, float16, INPUT(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, bfloat16, INPUT(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, float, INPUT(Nd4jPointer state, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, double, INPUT(Nd4jPointer state, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, float16, INPUT(Nd4jPointer state, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, bfloat16, INPUT(Nd4jPointer state, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, float, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, double, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, float16, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, bfloat16, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, float, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, double, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, float16, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, bfloat16, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, float, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, double, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, float16, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, bfloat16, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, float, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void const* y, Nd4jLong const* yShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, double, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void const* y, Nd4jLong const* yShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, float16, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void const* y, Nd4jLong const* yShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, bfloat16, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void const* y, Nd4jLong const* yShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) #endif @@ -110,10 +110,10 @@ namespace functions { namespace random { template template - void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -180,9 +180,9 @@ namespace functions { template template - void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void const* vx, Nd4jLong const* xShapeBuffer, void* vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -238,7 +238,7 @@ namespace functions { template template - void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -283,7 +283,7 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -295,7 +295,7 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -307,7 +307,7 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -319,7 +319,7 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t *stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t *stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -331,9 +331,9 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -345,9 +345,9 @@ namespace functions { template <> - _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -358,9 +358,9 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -371,9 +371,9 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -384,11 +384,10 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -399,10 +398,10 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -413,10 +412,10 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -429,10 +428,10 @@ namespace functions { template <> - _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); diff --git a/libnd4j/include/loops/cuda/reduce/reduce_bool.cu b/libnd4j/include/loops/cuda/reduce/reduce_bool.cu index 3aa2626a2..b70f0f38f 100644 --- a/libnd4j/include/loops/cuda/reduce/reduce_bool.cu +++ b/libnd4j/include/loops/cuda/reduce/reduce_bool.cu @@ -33,23 +33,24 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void simpleReduce(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleReduce(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { functions::reduce::ReduceBoolFunction::template transformCudaXD(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleScalar(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { functions::reduce::ReduceBoolFunction::template execScalarCuda(x, xShapeInfo, extraParams, z, zShapeInfo, reductionBuffer, tadOnlyShapeInfo); } @@ -94,14 +95,14 @@ __device__ void ReduceBoolFunction::aggregatePartials(void *vsPartials, Nd4 //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceBoolFunction::transformCudaXD( void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceBoolFunction::transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -147,13 +148,13 @@ __device__ void ReduceBoolFunction::transformCudaXD( void *vx, Nd4jLong *xS //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceBoolFunction::execScalarCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceBoolFunction::execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + const Nd4jLong *tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -235,7 +236,13 @@ __device__ void ReduceBoolFunction::execScalarCuda(void *vx, Nd4jLong *xSha //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceBoolFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__host__ void ReduceBoolFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { nd4j_printf("Step A%i\n", -1); @@ -244,7 +251,7 @@ __host__ void ReduceBoolFunction::intermediateXD(dim3 launchDims, cudaStrea if(shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(sd::LaunchContext::defaultContext()->getScalarPointer(), &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -265,14 +272,20 @@ __host__ void ReduceBoolFunction::intermediateXD(dim3 launchDims, cudaStrea //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceBoolFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +__host__ void ReduceBoolFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { if (shape::isEmpty(hXShapeInfo)) { if (shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(z, &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -289,7 +302,14 @@ __host__ void ReduceBoolFunction::intermediateScalar(dim3 launchDims, cudaS //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceBoolFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +_CUDA_H void ReduceBoolFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_TT(intermediateScalar, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo), OPS_A(REDUCE_BOOL_OPS)); sd::DebugHelper::checkErrorCode(stream, "execReduceScalarFloat(...) failed"); @@ -297,7 +317,14 @@ _CUDA_H void ReduceBoolFunction::execReduceScalar(dim3 launchDims, cudaStre //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceBoolFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +_CUDA_H void ReduceBoolFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const int rank, const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateXD, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionPointer, tadShapeInfo, tadOffsets), OPS_A(REDUCE_BOOL_OPS)); DEBUG_KERNEL(stream, opNum); diff --git a/libnd4j/include/loops/cuda/reduce/reduce_float.chpp b/libnd4j/include/loops/cuda/reduce/reduce_float.chpp index e1b95ae55..71f5d03da 100644 --- a/libnd4j/include/loops/cuda/reduce/reduce_float.chpp +++ b/libnd4j/include/loops/cuda/reduce/reduce_float.chpp @@ -35,23 +35,24 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void simpleReduce(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleReduce(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { functions::reduce::ReduceFloatFunction::template transformCudaXD(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleScalar(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { functions::reduce::ReduceFloatFunction::template execScalarCuda(x, xShapeInfo, extraParams, z, zShapeInfo, reductionBuffer, tadOnlyShapeInfo); } @@ -95,14 +96,14 @@ __device__ void ReduceFloatFunction::aggregatePartials(void *vsPartials, Nd //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceFloatFunction::transformCudaXD( void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceFloatFunction::transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -146,13 +147,13 @@ __device__ void ReduceFloatFunction::transformCudaXD( void *vx, Nd4jLong *x //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceFloatFunction::execScalarCuda(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { +__device__ void ReduceFloatFunction::execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + void *vreductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -234,14 +235,20 @@ __device__ void ReduceFloatFunction::execScalarCuda(void *vx, Nd4jLong *xSh //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceFloatFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShape, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__host__ void ReduceFloatFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShape, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { if(shape::isEmpty(hXShapeInfo)) { if(shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = std::is_same>::value ? sd::DataTypeUtils::nanOrZero() : static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = std::is_same>::value ? sd::DataTypeUtils::nanOrZero() : static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(sd::LaunchContext::defaultContext()->getScalarPointer(), &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) throw sd::cuda_exception::build("ReduceFloatFunction::intermediateXD: failed to copy temporary scalar", res); @@ -259,27 +266,40 @@ __host__ void ReduceFloatFunction::intermediateXD(dim3 launchDims, cudaStre //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceFloatFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +__host__ void ReduceFloatFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { if (shape::isEmpty(hXShapeInfo)) { if (shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = std::is_same>::value ? sd::DataTypeUtils::nanOrZero() : static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = std::is_same>::value ? sd::DataTypeUtils::nanOrZero() : static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(z, &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) throw sd::cuda_exception::build("ReduceFloatFunction::intermediateScalar: failed to copy resulting scalar", res); } else { - simpleScalar << < launchDims.x, launchDims.y, launchDims.z, *stream>>>(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo); + simpleScalar <<>>(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo); } } //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceFloatFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +_CUDA_H void ReduceFloatFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_TT(intermediateScalar, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo), OPS_A(REDUCE_FLOAT_OPS)); sd::DebugHelper::checkErrorCode(stream, "execReduceScalarFloat(...) failed"); @@ -287,7 +307,14 @@ _CUDA_H void ReduceFloatFunction::execReduceScalar(dim3 launchDims, cudaStr //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceFloatFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *x, Nd4jLong *xShape, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShape, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +_CUDA_H void ReduceFloatFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const int rank, const void *x, const Nd4jLong *xShape, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShape, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateXD, PARAMS(launchDims, stream, x, xShape, hXShapeInfo, extraParams, z, zShape, hZShapeInfo, dimension, dimensionLength, reductionPointer, tadShapeInfo, tadOffsets), OPS_A(REDUCE_FLOAT_OPS)); DEBUG_KERNEL(stream, opNum); diff --git a/libnd4j/include/loops/cuda/reduce/reduce_long.cu b/libnd4j/include/loops/cuda/reduce/reduce_long.cu index e55ecd11c..1beac5330 100644 --- a/libnd4j/include/loops/cuda/reduce/reduce_long.cu +++ b/libnd4j/include/loops/cuda/reduce/reduce_long.cu @@ -33,46 +33,48 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__device__ void reduceSimpleGeneric(void *x, Nd4jLong *xShapeInfo, +__device__ void reduceSimpleGeneric(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { functions::reduce::ReduceLongFunction::template transformCudaXD(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__device__ void reduceScalarGeneric(void *x, Nd4jLong *xShapeInfo, +__device__ void reduceScalarGeneric(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { functions::reduce::ReduceLongFunction::template execScalarCuda(x, xShapeInfo, extraParams, z, zShapeInfo, reductionBuffer, tadOnlyShapeInfo); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleReduce(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleReduce(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { reduceSimpleGeneric(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleScalar(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { reduceScalarGeneric(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo); } @@ -116,14 +118,14 @@ __device__ void ReduceLongFunction::aggregatePartials(void *vsPartials, Nd4 //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceLongFunction::transformCudaXD( void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceLongFunction::transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -167,13 +169,13 @@ __device__ void ReduceLongFunction::transformCudaXD( void *vx, Nd4jLong *xS //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceLongFunction::execScalarCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceLongFunction::execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + const Nd4jLong *tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -254,14 +256,20 @@ __device__ void ReduceLongFunction::execScalarCuda(void *vx, Nd4jLong *xSha //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceLongFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__host__ void ReduceLongFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { if(shape::isEmpty(hXShapeInfo)) { if(shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(sd::LaunchContext::defaultContext()->getScalarPointer(), &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -280,14 +288,20 @@ __host__ void ReduceLongFunction::intermediateXD(dim3 launchDims, cudaStrea //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceLongFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +__host__ void ReduceLongFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { if (shape::isEmpty(hXShapeInfo)) { if (shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(z, &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -300,7 +314,14 @@ __host__ void ReduceLongFunction::intermediateScalar(dim3 launchDims, cudaS //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceLongFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +_CUDA_H void ReduceLongFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_TT(intermediateScalar, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo), OPS_A(REDUCE_LONG_OPS)); sd::DebugHelper::checkErrorCode(stream, "execReduceScalarFloat(...) failed"); @@ -308,7 +329,14 @@ _CUDA_H void ReduceLongFunction::execReduceScalar(dim3 launchDims, cudaStre //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceLongFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *x, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +_CUDA_H void ReduceLongFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, + const int opNum, + int rank, const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateXD, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionPointer, tadShapeInfo, tadOffsets), OPS_A(REDUCE_LONG_OPS)); DEBUG_KERNEL(stream, opNum); diff --git a/libnd4j/include/loops/cuda/reduce/reduce_same.cu b/libnd4j/include/loops/cuda/reduce/reduce_same.cu index c3c74c806..c1947314e 100644 --- a/libnd4j/include/loops/cuda/reduce/reduce_same.cu +++ b/libnd4j/include/loops/cuda/reduce/reduce_same.cu @@ -34,23 +34,23 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void simpleReduce(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleReduce(void const* x, Nd4jLong const* xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { functions::reduce::ReduceSameFunction::template transformCudaXD(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleScalar(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleScalar(void const* x, Nd4jLong const* xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo) { functions::reduce::ReduceSameFunction::template execScalarCuda(x, xShapeInfo, extraParams, z, zShapeInfo, reductionBuffer, tadOnlyShapeInfo); } @@ -95,14 +95,14 @@ __device__ void ReduceSameFunction::aggregatePartials(void *vsPartials, Nd4jL //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceSameFunction::transformCudaXD( void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceSameFunction::transformCudaXD( void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -155,23 +155,23 @@ __device__ void ReduceSameFunction::transformCudaXD( void *vx, Nd4jLong *xSha //////////////////////////////////////////////////////////////////////// template -__device__ void ReduceSameFunction::execScalarCudaLegacy(int opNum, void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceSameFunction::execScalarCudaLegacy(int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + Nd4jLong const* tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_T(execScalarCuda, PARAMS(vx, xShapeInfo, vextraParams, vz, zShapeInfo, vreductionBuffer, tadOnlyShapeInfo), REDUCE_SAME_OPS); } //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceSameFunction::execScalarCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceSameFunction::execScalarCuda(void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void * vz, Nd4jLong const* zShapeInfo, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); + Nd4jLong const* tadOnlyShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -251,14 +251,14 @@ __device__ void ReduceSameFunction::execScalarCuda(void *vx, Nd4jLong *xShape //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceSameFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__host__ void ReduceSameFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { if(shape::isEmpty(hXShapeInfo)) { if(shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(sd::LaunchContext::defaultContext()->getScalarPointer(), &startingVal, sizeof(X), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -277,14 +277,14 @@ __host__ void ReduceSameFunction::intermediateXD(dim3 launchDims, cudaStream_ //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceSameFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +__host__ void ReduceSameFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo) { if (shape::isEmpty(hXShapeInfo)) { if (shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(z, &startingVal, sizeof(X), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -297,7 +297,7 @@ __host__ void ReduceSameFunction::intermediateScalar(dim3 launchDims, cudaStr //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceSameFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +_CUDA_H void ReduceSameFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_T(intermediateScalar, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo), REDUCE_SAME_OPS); sd::DebugHelper::checkErrorCode(stream, "execReduceScalarSame(...) failed"); @@ -305,7 +305,7 @@ _CUDA_H void ReduceSameFunction::execReduceScalar(dim3 launchDims, cudaStream //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceSameFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *x, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +_CUDA_H void ReduceSameFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { DISPATCH_BY_OPNUM_T(intermediateXD, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionPointer, tadShapeInfo, tadOffsets), REDUCE_SAME_OPS); DEBUG_KERNEL(stream, opNum); diff --git a/libnd4j/include/loops/cuda/reduce3.chpp b/libnd4j/include/loops/cuda/reduce3.chpp index 2fa16e9ac..2a301b817 100644 --- a/libnd4j/include/loops/cuda/reduce3.chpp +++ b/libnd4j/include/loops/cuda/reduce3.chpp @@ -32,28 +32,28 @@ namespace reduce3 { //////////////////////////////////////////////////////////////////////// template __global__ void execScalarGeneric(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int* allocationPointer, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + Nd4jLong const* tadOnlyShapeInfo) { Reduce3::execScalarCuda(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, allocationPointer, reductionBuffer, tadOnlyShapeInfo); } template __global__ void execAllGeneric(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { Reduce3::execAllCuda(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); } @@ -62,15 +62,15 @@ __global__ void execAllGeneric(const int opNum, //////////////////////////////////////////////////////////////////////// template __global__ void execGeneric(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { Reduce3::execCuda(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); } @@ -111,14 +111,14 @@ __device__ void Reduce3::aggregatePartials(void* vsPartials, Nd4jLong tid, ////////////////////////////////////////////////////////////////////////// template template -__device__ void Reduce3::execScalarCuda( void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, +__device__ void Reduce3::execScalarCuda( void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, - int *allocationPointer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void* vz, Nd4jLong const* zShapeInfo, + int *allocationPointer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); __shared__ Z extraZ[3]; @@ -235,18 +235,18 @@ __device__ void Reduce3::execScalarCuda( void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template template -__device__ void Reduce3::transformAll( void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, +__device__ void Reduce3::transformAll( void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo,Nd4jLong *yOffsets) { + Nd4jLong const* xTadShapeInfo, Nd4jLong const* xOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yOffsets) { - auto dx = reinterpret_cast(vx); - auto dy = reinterpret_cast(vy); + auto dx = reinterpret_cast(vx); + auto dy = reinterpret_cast(vy); auto z = reinterpret_cast(vz); // initialize partials first @@ -287,7 +287,7 @@ __device__ void Reduce3::transformAll( void *vx, Nd4jLong *xShapeInfo, for (int r = blockIdx.x; r < xTads; r += blockDim.x * gridDim.x) { - X *x = dx + xOffsets[r]; + auto x = dx + xOffsets[r]; if (threadIdx.x < xTadLength && threadIdx.x < maxBlock) { auto x0 = shape::getIndexOffset(threadIdx.x, xTadShapeInfo); @@ -297,7 +297,7 @@ __device__ void Reduce3::transformAll( void *vx, Nd4jLong *xShapeInfo, for (int g = 0; g < yTads; g++) { - X *y = dy + yOffsets[g]; + auto y = dy + yOffsets[g]; int ri = (r * yTads) + g; sPartials[threadIdx.x] = startingVal; @@ -339,15 +339,15 @@ __device__ void Reduce3::transformAll( void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template template -__device__ void Reduce3::transform(void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, +__device__ void Reduce3::transform(void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { // FIXME if(shape::isScalar(zShapeInfo)) @@ -357,8 +357,8 @@ __device__ void Reduce3::transform(void *vx, Nd4jLong *xShapeInfo, yTadOnlyShapeInfo = yShapeInfo; // execReduce3TAD case } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); Z startingVal = OpType::startingValue(x); @@ -454,15 +454,15 @@ __device__ void Reduce3::transform(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template __device__ void Reduce3::execCuda(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets), REDUCE3_OPS); } @@ -472,15 +472,15 @@ __device__ void Reduce3::execCuda(const int opNum, ////////////////////////////////////////////////////////////////////////// template __device__ void Reduce3::execAllCuda( const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { DISPATCH_BY_OPNUM_TT(transformAll, PARAMS(vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets), REDUCE3_OPS); } @@ -489,12 +489,12 @@ __device__ void Reduce3::execAllCuda( const int opNum, ////////////////////////////////////////////////////////////////////////// template __device__ void Reduce3::execScalarCuda(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int * allocationPointer, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + Nd4jLong const* tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalarCuda, PARAMS(vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, allocationPointer, reductionBuffer, tadOnlyShapeInfo), REDUCE3_OPS); } @@ -504,15 +504,15 @@ __device__ void Reduce3::execScalarCuda(const int opNum, template __host__ void Reduce3::exec(dim3 launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { execGeneric<<>>(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); sd::DebugHelper::checkErrorCode(stream, "reduce3exec(...) failed"); @@ -522,15 +522,15 @@ __host__ void Reduce3::exec(dim3 launchDims, cudaStream_t *stream, template __host__ void Reduce3::execAll(dim3 launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { execAllGeneric<<>>(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); sd::DebugHelper::checkErrorCode(stream, "execAllGeneric(...) failed"); @@ -540,13 +540,13 @@ __host__ void Reduce3::exec(dim3 launchDims, cudaStream_t *stream, template __host__ void Reduce3::execScalar(dim3 launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int* allocationPointer, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + Nd4jLong const* tadOnlyShapeInfo) { execScalarGeneric<<>>(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, allocationPointer, reductionBuffer, tadOnlyShapeInfo); sd::DebugHelper::checkErrorCode(stream, "execScalarGeneric(...) failed"); diff --git a/libnd4j/include/loops/cuda/scalar.chpp b/libnd4j/include/loops/cuda/scalar.chpp index ec1b42334..b412e4957 100644 --- a/libnd4j/include/loops/cuda/scalar.chpp +++ b/libnd4j/include/loops/cuda/scalar.chpp @@ -32,10 +32,10 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////////////// template -__global__ static void scalarSimpleShaped(void* vx, void *vscalar, Nd4jLong *xShapeInfo, void *vparams, void *vz, Nd4jLong *zShapeInfo, int *allocationBuffer) { +__global__ static void scalarSimpleShaped(void const* vx, void const* vscalar, Nd4jLong const* xShapeInfo, void *vparams, void *vz, Nd4jLong const* zShapeInfo, int *allocationBuffer) { - auto scalar = reinterpret_cast(vscalar)[0]; - auto x = reinterpret_cast(vx); + auto scalar = reinterpret_cast(vscalar)[0]; + auto x = reinterpret_cast(vx); auto params = reinterpret_cast(vparams); auto z = reinterpret_cast(vz); @@ -69,18 +69,18 @@ __global__ static void scalarSimpleShaped(void* vx, void *vscalar, Nd4jLong *xSh //////////////////////////////////////////////////////////////////////////////// template -__global__ static void scalarAlongDimension(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, +__global__ static void scalarAlongDimension(void const* vx, Nd4jLong const* xShapeInfo, + void* vextraParams, + void* vz, Nd4jLong const* zShapeInfo, + void const* vscalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); auto z = reinterpret_cast(vz); - auto scalars = reinterpret_cast(vscalars); + auto scalars = reinterpret_cast(vscalars); if (tadShapeInfoZ == nullptr) { tadShapeInfoZ = tadShapeInfo; @@ -98,7 +98,7 @@ __global__ static void scalarAlongDimension(void *vx, Nd4jLong *xShapeInfo, // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { Z *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -109,7 +109,7 @@ __global__ static void scalarAlongDimension(void *vx, Nd4jLong *xShapeInfo, // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { Z *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -126,7 +126,7 @@ namespace scalar { //////////////////////////////////////////////////////////////////////////////// template template -void _CUDA_H ScalarTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, void* vscalar, void *vextraParams, int *allocPointer){ +void _CUDA_H ScalarTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, void const* vscalar, void *vextraParams, int *allocPointer){ auto xEws = shape::elementWiseStride(hxShapeInfo); auto xOrder = shape::order(hxShapeInfo); @@ -143,14 +143,14 @@ void _CUDA_H ScalarTransform::intermediateShaped(dim3& launchDims, cudaSt //////////////////////////////////////////////////////////////////////////////// template template -void _CUDA_H ScalarTransform::intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { +void _CUDA_H ScalarTransform::intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void *z, Nd4jLong const* zShapeInfo, void const* scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { scalarAlongDimension<<>>(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); sd::DebugHelper::checkErrorCode(stream, "scalarAlongDimA(...) failed"); } //////////////////////////////////////////////////////////////////////////////// template -void ScalarTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, void* vscalar, void *vextraParams) { +void ScalarTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, void const* vscalar, void *vextraParams) { if (sd::Environment::getInstance()->isDebugAndVerbose()) printf("H14 opNum:[%i]\n", opNum); @@ -160,11 +160,10 @@ void ScalarTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *s //////////////////////////////////////////////////////////////////////////////// template -void ScalarTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { +void ScalarTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void const* vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TTT(intermediateAlongDimension, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalars, vextraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), SCALAR_OPS); } - } } diff --git a/libnd4j/include/loops/cuda/scalar_bool.cu b/libnd4j/include/loops/cuda/scalar_bool.cu index 1c8929ef3..e23560778 100644 --- a/libnd4j/include/loops/cuda/scalar_bool.cu +++ b/libnd4j/include/loops/cuda/scalar_bool.cu @@ -29,13 +29,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void scalarAlongDimension(void *x, Nd4jLong *xShapeInfo, +__global__ void scalarAlongDimension(void const* x, Nd4jLong const* xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, - void *scalars, + void *z, Nd4jLong const* zShapeInfo, + void const* scalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::scalar::ScalarBoolTransform::template transformCuda(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); } @@ -43,7 +43,7 @@ __global__ void scalarAlongDimension(void *x, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template -__global__ void scalarSimpleShaped(void* x, void *y, Nd4jLong *xShapeInfo, void *params, void *z, Nd4jLong *zShapeInfo, int *allocationBuffer) { +__global__ void scalarSimpleShaped(void const* x, void const* y, Nd4jLong const* xShapeInfo, void *params, void *z, Nd4jLong const* zShapeInfo, int *allocationBuffer) { functions::scalar::ScalarBoolTransform::template transformCuda(y, x, xShapeInfo, params, z, zShapeInfo, allocationBuffer); } @@ -60,13 +60,13 @@ namespace scalar { //////////////////////////////////////////////////////////////////////// template template -__device__ void ScalarBoolTransform::transformCuda(void* vscalar, - void *vy, Nd4jLong *yShapeInfo, +__device__ void ScalarBoolTransform::transformCuda(void const* vscalar, + void const* vy, Nd4jLong const* yShapeInfo, void *vparams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *allocationBuffer) { - auto scalar = reinterpret_cast(vscalar)[0]; - auto y = reinterpret_cast(vy); + auto scalar = reinterpret_cast(vscalar)[0]; + auto y = reinterpret_cast(vy); auto params = reinterpret_cast(vparams); auto z = reinterpret_cast(vz); @@ -101,14 +101,14 @@ __device__ void ScalarBoolTransform::transformCuda(void* vscalar, template template __device__ void ScalarBoolTransform::transformCuda(Nd4jLong len, - void* vx, - void *vy, Nd4jLong yEWS, + void const* vx, + void const* vy, Nd4jLong yEWS, void *vparams, void *vz, Nd4jLong zEWS, int *allocationBuffer) { - auto x = reinterpret_cast(vx)[0]; - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx)[0]; + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto params = reinterpret_cast(vparams); @@ -130,15 +130,15 @@ __device__ void ScalarBoolTransform::transformCuda(Nd4jLong len, //////////////////////////////////////////////////////////////////////// template template -__device__ void ScalarBoolTransform::transformCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ScalarBoolTransform::transformCuda(void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { - auto x = reinterpret_cast(vx); - auto scalars = reinterpret_cast(vscalars); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { + auto x = reinterpret_cast(vx); + auto scalars = reinterpret_cast(vscalars); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -158,7 +158,7 @@ __device__ void ScalarBoolTransform::transformCuda(void *vx, Nd4jLong *xS // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { Z *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -169,7 +169,7 @@ __device__ void ScalarBoolTransform::transformCuda(void *vx, Nd4jLong *xS // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { Z *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -184,13 +184,13 @@ __device__ void ScalarBoolTransform::transformCuda(void *vx, Nd4jLong *xS template template _CUDA_H void ScalarBoolTransform::intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, - void *x, Nd4jLong *xShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *scalars, + void const* x, Nd4jLong const* xShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void const* scalars, void *extraParams, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { scalarAlongDimension<<>>(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); sd::DebugHelper::checkErrorCode(stream, "scalarAlongDim(...) failed"); @@ -200,9 +200,9 @@ _CUDA_H void ScalarBoolTransform::intermediateAlongDimension(dim3& launchD template template void _CUDA_H ScalarBoolTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void* vscalar, + void const* vx, Nd4jLong const* xShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalar, void *vextraParams, int *allocPointer){ scalarSimpleShaped<<>>(vx, vscalar, xShapeInfo, vextraParams, vz, zShapeInfo, allocPointer); @@ -213,20 +213,20 @@ void _CUDA_H ScalarBoolTransform::intermediateShaped(dim3& launchDims, cuda template void ScalarBoolTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void* vscalar, - void *vextraParams) { + void const* vx, Nd4jLong const* xShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalar, + void const* vextraParams) { if (sd::Environment::getInstance()->isDebugAndVerbose()) printf("H14 opNum:[%i]\n", opNum); - DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalar, vextraParams, nullptr), SCALAR_BOOL_OPS); + DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalar, const_cast(vextraParams), nullptr), SCALAR_BOOL_OPS); } //////////////////////////////////////////////////////////////////////// template -void ScalarBoolTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { +void ScalarBoolTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void const* vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TT(intermediateAlongDimension, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalars, vextraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), SCALAR_BOOL_OPS); } diff --git a/libnd4j/include/loops/cuda/scalar_int.cu b/libnd4j/include/loops/cuda/scalar_int.cu index bb761c76c..2ca0ade26 100644 --- a/libnd4j/include/loops/cuda/scalar_int.cu +++ b/libnd4j/include/loops/cuda/scalar_int.cu @@ -29,13 +29,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void scalarAlongDimension(void *x, Nd4jLong *xShapeInfo, +__global__ void scalarAlongDimension(void const* x, Nd4jLong const* xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, - void *scalars, + void *z, Nd4jLong const* zShapeInfo, + void const* scalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::scalar::ScalarIntTransform::template transformCuda(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); } @@ -43,7 +43,7 @@ __global__ void scalarAlongDimension(void *x, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template -__global__ void scalarSimpleShaped(void* x, void *y, Nd4jLong *xShapeInfo, void *params, void *z, Nd4jLong *zShapeInfo, int *allocationBuffer) { +__global__ void scalarSimpleShaped(void const* x, void const* y, Nd4jLong const* xShapeInfo, void *params, void *z, Nd4jLong const* zShapeInfo, int *allocationBuffer) { functions::scalar::ScalarIntTransform::template transformCuda(y, x, xShapeInfo, params, z, zShapeInfo, allocationBuffer); } @@ -60,13 +60,13 @@ namespace scalar { //////////////////////////////////////////////////////////////////////// template template -__device__ void ScalarIntTransform::transformCuda(void* vscalar, - void *vy, Nd4jLong *yShapeInfo, +__device__ void ScalarIntTransform::transformCuda(void const* vscalar, + void const* vy, Nd4jLong const* yShapeInfo, void *vparams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *allocationBuffer) { - auto scalar = reinterpret_cast(vscalar)[0]; - auto y = reinterpret_cast(vy); + auto scalar = reinterpret_cast(vscalar)[0]; + auto y = reinterpret_cast(vy); auto params = reinterpret_cast(vparams); auto z = reinterpret_cast(vz); @@ -101,14 +101,14 @@ __device__ void ScalarIntTransform::transformCuda(void* vscalar, template template __device__ void ScalarIntTransform::transformCuda(Nd4jLong len, - void* vx, - void *vy, Nd4jLong yEWS, + void const* vx, + void const* vy, Nd4jLong yEWS, void *vparams, void *vz, Nd4jLong zEWS, int *allocationBuffer) { - auto x = reinterpret_cast(vx)[0]; - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx)[0]; + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto params = reinterpret_cast(vparams); @@ -130,15 +130,15 @@ __device__ void ScalarIntTransform::transformCuda(Nd4jLong len, //////////////////////////////////////////////////////////////////////// template template -__device__ void ScalarIntTransform::transformCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ScalarIntTransform::transformCuda(void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { - auto x = reinterpret_cast(vx); - auto scalars = reinterpret_cast(vscalars); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { + auto x = reinterpret_cast(vx); + auto scalars = reinterpret_cast(vscalars); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -158,7 +158,7 @@ __device__ void ScalarIntTransform::transformCuda(void *vx, Nd4jLong *xShape // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { X *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -169,7 +169,7 @@ __device__ void ScalarIntTransform::transformCuda(void *vx, Nd4jLong *xShape // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { X *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -184,13 +184,13 @@ __device__ void ScalarIntTransform::transformCuda(void *vx, Nd4jLong *xShape template template _CUDA_H void ScalarIntTransform::intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, - void *x, Nd4jLong *xShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *scalars, + void const* x, Nd4jLong const* xShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void const* scalars, void *extraParams, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { scalarAlongDimension<<>>(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); } @@ -199,9 +199,9 @@ _CUDA_H void ScalarIntTransform::intermediateAlongDimension(dim3& launchDims, template template void _CUDA_H ScalarIntTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void* vscalar, + void const* vx, Nd4jLong const* xShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalar, void *vextraParams, int *allocPointer){ scalarSimpleShaped<<>>(vx, vscalar, xShapeInfo, vextraParams, vz, zShapeInfo, allocPointer); @@ -211,10 +211,10 @@ void _CUDA_H ScalarIntTransform::intermediateShaped(dim3& launchDims, cudaStr template void ScalarIntTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void* vscalar, - void *vextraParams) { + void const* vx, Nd4jLong const* xShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalar, + void* vextraParams) { if (sd::Environment::getInstance()->isDebugAndVerbose()) printf("H14 opNum:[%i]\n", opNum); @@ -224,7 +224,7 @@ void ScalarIntTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *st //////////////////////////////////////////////////////////////////////// template -void ScalarIntTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { +void ScalarIntTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void const* vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_T(intermediateAlongDimension, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalars, vextraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), SCALAR_INT_OPS); } diff --git a/libnd4j/include/loops/cuda/specials/bitonicArbitraryStep.cu b/libnd4j/include/loops/cuda/specials/bitonicArbitraryStep.cu index 13ad1d5b4..999a09942 100644 --- a/libnd4j/include/loops/cuda/specials/bitonicArbitraryStep.cu +++ b/libnd4j/include/loops/cuda/specials/bitonicArbitraryStep.cu @@ -23,7 +23,7 @@ ////////////////////////////////////////////////////////////////////////// template -__global__ void bitonicArbitraryStepKernelKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending) { +__global__ void bitonicArbitraryStepKernelKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending) { auto x = static_cast(vx); auto y = static_cast(vy); @@ -101,7 +101,7 @@ __global__ void bitonicArbitraryStepKernelKey(void *vx, Nd4jLong *xShapeInfo, vo ////////////////////////////////////////////////////////////////////////// template -__global__ void execBitonicArbitraryStepKernel(void *vx, Nd4jLong *xShapeInfo, int window, int length, int reverse, bool descending) { +__global__ void execBitonicArbitraryStepKernel(void *vx, Nd4jLong const* xShapeInfo, int window, int length, int reverse, bool descending) { auto x = static_cast(vx); int tid = threadIdx.x + blockDim.x * blockIdx.x; @@ -177,14 +177,14 @@ __global__ void execBitonicArbitraryStepKernel(void *vx, Nd4jLong *xShapeInfo, i ////////////////////////////////////////////////////////////////////////// template -__host__ void bitonicArbitraryStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int window, int length, int reverse, bool descending) { +__host__ void bitonicArbitraryStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int window, int length, int reverse, bool descending) { execBitonicArbitraryStepKernel<<>>(vx, xShapeInfo, window, length, reverse, descending); } template -__host__ void bitonicArbitraryStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending) { +__host__ void bitonicArbitraryStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending) { bitonicArbitraryStepKernelKey<<>>(vx, xShapeInfo, vy, yShapeInfo, window, length, reverse, descending); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT bitonicArbitraryStepGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int window, int length, int reverse, bool descending), LIBND4J_TYPES); -BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT bitonicArbitraryStepGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT bitonicArbitraryStepGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int window, int length, int reverse, bool descending), LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT bitonicArbitraryStepGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); diff --git a/libnd4j/include/loops/cuda/specials/bitonicSortStep.cu b/libnd4j/include/loops/cuda/specials/bitonicSortStep.cu index 6bd1e8a33..679e44d1f 100644 --- a/libnd4j/include/loops/cuda/specials/bitonicSortStep.cu +++ b/libnd4j/include/loops/cuda/specials/bitonicSortStep.cu @@ -24,7 +24,7 @@ ////////////////////////////////////////////////////////////////////////// template -__global__ void bitonicSortStepKernelKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending) { +__global__ void bitonicSortStepKernelKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending) { auto x = static_cast(vx); auto y = static_cast(vy); @@ -79,7 +79,7 @@ __global__ void bitonicSortStepKernelKey(void *vx, Nd4jLong *xShapeInfo, void *v ////////////////////////////////////////////////////////////////////////// template -__global__ void bitonicSortStepKernel(void *vx, Nd4jLong *xShapeInfo, int j, int k, int length, bool descending) { +__global__ void bitonicSortStepKernel(void *vx, Nd4jLong const* xShapeInfo, int j, int k, int length, bool descending) { auto x = static_cast(vx); @@ -125,16 +125,16 @@ __global__ void bitonicSortStepKernel(void *vx, Nd4jLong *xShapeInfo, int j, int ////////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int j, int k, int length, bool descending) { +__host__ void bitonicSortStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int j, int k, int length, bool descending) { bitonicSortStepKernel<<>>(vx, xShapeInfo, j, k, length, descending); } ////////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending) { +__host__ void bitonicSortStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending) { bitonicSortStepKernelKey<<>>(vx, xShapeInfo, vy, yShapeInfo, j, k, length, descending); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT bitonicSortStepGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int j, int k, int length, bool descending), LIBND4J_TYPES); -BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT bitonicSortStepGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT bitonicSortStepGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int j, int k, int length, bool descending), LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT bitonicSortStepGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); diff --git a/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu b/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu index 813de162d..409f84cc6 100644 --- a/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu +++ b/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu @@ -26,13 +26,13 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template - __device__ void fillDimensionalIsMax(void *vdX, - void *vdZ, Nd4jLong *zShapeInfo, - Nd4jLong *tadOnlyShapeInfo, + __device__ void fillDimensionalIsMax(const void *vdX, + void *vdZ, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOffsets) { + const Nd4jLong *tadOffsets) { - auto dX = reinterpret_cast(vdX); + auto dX = reinterpret_cast(vdX); auto dZ = reinterpret_cast(vdZ); __shared__ int tadLength; @@ -69,11 +69,11 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template - __global__ void execfillDimensionalIsMax(void *dX, - void *dZ, Nd4jLong *zShapeInfo, - Nd4jLong *tadOnlyShapeInfo, + __global__ void execfillDimensionalIsMax(const void *dX, + void *dZ, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOffsets) { + const Nd4jLong *tadOffsets) { fillDimensionalIsMax(dX, dZ, zShapeInfo, tadOnlyShapeInfo, dimension, dimensionLength, tadOffsets); } @@ -81,14 +81,14 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template __host__ void fillDimensionalIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, - void *dX, - void *dZ, Nd4jLong *zShapeInfo, - Nd4jLong *tadOnlyShapeInfo, + const void *dX, + void *dZ, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOffsets) { + const Nd4jLong *tadOffsets) { execfillDimensionalIsMax<<>>(dX, dZ, zShapeInfo, tadOnlyShapeInfo, dimension, dimensionLength, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "fillDimensionalIsMax(...) failed"); } - BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT fillDimensionalIsMaxGeneric, (dim3& launchDims, cudaStream_t *stream, void *dX, void *dZ, Nd4jLong *zShapeInfo, Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT fillDimensionalIsMaxGeneric, (dim3& launchDims, cudaStream_t *stream, const void *dX, void *dZ, const Nd4jLong *zShapeInfo, const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, const Nd4jLong *tadOffsets), LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/specials/fillIsMax.cu b/libnd4j/include/loops/cuda/specials/fillIsMax.cu index 1a994a13c..00997b022 100644 --- a/libnd4j/include/loops/cuda/specials/fillIsMax.cu +++ b/libnd4j/include/loops/cuda/specials/fillIsMax.cu @@ -25,7 +25,7 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template - __global__ void execFillIsMax(void *vdZ, Nd4jLong *xShapeInfo, Nd4jLong length, long idx) { + __global__ void execFillIsMax(void *vdZ, const Nd4jLong *xShapeInfo, Nd4jLong length, long idx) { auto dz = reinterpret_cast(vdZ); int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -35,11 +35,11 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template - __host__ void fillIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong *xShapeInfo, Nd4jLong length, long idx) { + __host__ void fillIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, const Nd4jLong *xShapeInfo, Nd4jLong length, long idx) { execFillIsMax<<>>(dx, xShapeInfo, length, idx); sd::DebugHelper::checkErrorCode(stream, "fillIsMax(...) failed"); } - BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT fillIsMaxGeneric, (dim3& launchDims, cudaStream_t *stream, void* dz, Nd4jLong *zShapeInfo, Nd4jLong length, long idx), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT fillIsMaxGeneric, (dim3& launchDims, cudaStream_t *stream, void* dz, const Nd4jLong *zShapeInfo, Nd4jLong length, long idx), LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/specials/oesTad.cu b/libnd4j/include/loops/cuda/specials/oesTad.cu index 9f41ffbb9..6f08e23ad 100644 --- a/libnd4j/include/loops/cuda/specials/oesTad.cu +++ b/libnd4j/include/loops/cuda/specials/oesTad.cu @@ -22,10 +22,10 @@ ////////////////////////////////////////////////////////////////////////// template -__global__ void execOesTadKernelKey(void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, +__global__ void execOesTadKernelKey(void *vx, Nd4jLong const* xShapeInfo, + void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { auto x = static_cast(vx); @@ -94,9 +94,9 @@ __global__ void execOesTadKernelKey(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template -__global__ void execOesTadKernel(void *vx, Nd4jLong *xShapeInfo, +__global__ void execOesTadKernel(void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { auto x = static_cast(vx); @@ -182,9 +182,9 @@ __global__ void execOesTadKernel(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template __host__ void oesTadGeneric(dim3 &launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, + void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { execOesTadKernel<<>>(vx, xShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); @@ -192,14 +192,14 @@ __host__ void oesTadGeneric(dim3 &launchDims, cudaStream_t *stream, template __host__ void oesTadGenericKey(dim3 &launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void *vx, Nd4jLong const* xShapeInfo, + void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { execOesTadKernelKey<<>>(vx, xShapeInfo, vy, yShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT oesTadGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending), LIBND4J_TYPES); -BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT oesTadGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT oesTadGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending), LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT oesTadGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); diff --git a/libnd4j/include/loops/cuda/specials/pullRowsKernel.cu b/libnd4j/include/loops/cuda/specials/pullRowsKernel.cu index 7ef6a46db..69d103e67 100644 --- a/libnd4j/include/loops/cuda/specials/pullRowsKernel.cu +++ b/libnd4j/include/loops/cuda/specials/pullRowsKernel.cu @@ -29,8 +29,8 @@ namespace sd { void *vz, Nd4jLong len, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets) { auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); @@ -67,8 +67,8 @@ namespace sd { void *vz, Nd4jLong len, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets) { pullRowsKernel(vx, vz, len, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets); } @@ -80,13 +80,13 @@ namespace sd { void *vz, Nd4jLong len, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets) { execPullRowsKernel<<>>(vx, vz, len, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets); sd::DebugHelper::checkErrorCode(stream, "pullRows(...) failed"); } - BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT pullRowsKernelGeneric, (dim3 & launchDims, cudaStream_t * stream, void * vx, void * vz, Nd4jLong len, Nd4jLong * indexes, Nd4jLong * tadShapeInfo, Nd4jLong * tadOffsets, Nd4jLong *zTadShapeInfo, Nd4jLong * zTadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT pullRowsKernelGeneric, (dim3 & launchDims, cudaStream_t * stream, void * vx, void * vz, Nd4jLong len, Nd4jLong * indexes, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets), LIBND4J_TYPES); } diff --git a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu index 796ea85c0..334584fab 100644 --- a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu +++ b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu @@ -27,7 +27,7 @@ namespace sd { // input - theSecondBuffer/Shape from input NDArray // output - theFirstBuffer/Shape from input NDArray template - static __global__ void swapUnsafeKernel(void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape) { + static __global__ void swapUnsafeKernel(void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; int totalThreads = gridDim.x * blockDim.x; @@ -51,12 +51,12 @@ namespace sd { } } - BUILD_SINGLE_TEMPLATE(template __global__ void swapUnsafeKernel, (void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template __global__ void swapUnsafeKernel, (void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape), LIBND4J_TYPES); template - void templatedSwapUnsafe(void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape, cudaStream_t* theStream) { + void templatedSwapUnsafe(void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape, cudaStream_t* theStream) { swapUnsafeKernel<<<256, 512, 8192, *theStream>>>(theFirstBuffer, theFirstShape, theSecondBuffer, theSecondShape); } - BUILD_SINGLE_TEMPLATE(template void templatedSwapUnsafe, (void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape, cudaStream_t* theStream), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void templatedSwapUnsafe, (void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape, cudaStream_t* theStream), LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/specials/tearKernel.cu b/libnd4j/include/loops/cuda/specials/tearKernel.cu index a6285b5a5..e1d70e6b5 100644 --- a/libnd4j/include/loops/cuda/specials/tearKernel.cu +++ b/libnd4j/include/loops/cuda/specials/tearKernel.cu @@ -26,8 +26,8 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template __device__ void - tearKernel(void *vx, Nd4jLong *xShapeInfo, Nd4jPointer *targets, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + tearKernel(void *vx, Nd4jLong const* xShapeInfo, Nd4jPointer *targets, Nd4jLong const* zShapeInfo, Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { @@ -39,8 +39,8 @@ namespace sd { // __shared__ int zRank; // __shared__ Nd4jLong *tadShape; // __shared__ Nd4jLong *tadStride; -// __shared__ Nd4jLong *zShape; -// __shared__ Nd4jLong *zStride; +// __shared__ Nd4jLong const* zShape; +// __shared__ Nd4jLong const* zStride; __shared__ T* x; if (threadIdx.x == 0) { tadLength = shape::length(tadShapeInfo); @@ -74,8 +74,8 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template __global__ void - execTearKernel(void *vx, Nd4jLong *xShapeInfo, Nd4jPointer *targets, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + execTearKernel(void *vx, Nd4jLong const* xShapeInfo, Nd4jPointer *targets, Nd4jLong const* zShapeInfo, Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { tearKernel(vx, xShapeInfo, targets, zShapeInfo, tadShapeInfo, tadOffsets); } @@ -83,13 +83,13 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template __host__ void tearKernelGeneric(dim3 &launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - Nd4jPointer *targets, Nd4jLong *zShapeInfo, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + void *vx, Nd4jLong const* xShapeInfo, + Nd4jPointer *targets, Nd4jLong const* zShapeInfo, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { execTearKernel<<>>(vx, xShapeInfo, targets, zShapeInfo, tadShapeInfo, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "tear(...) failed"); } - BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT tearKernelGeneric, (dim3 & launchDims, cudaStream_t * stream, void * vx, Nd4jLong * xShapeInfo, Nd4jPointer *targets, Nd4jLong * zShapeInfo, Nd4jLong * tadShapeInfo, Nd4jLong * tadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT tearKernelGeneric, (dim3 & launchDims, cudaStream_t * stream, void * vx, Nd4jLong const* xShapeInfo, Nd4jPointer *targets, Nd4jLong const* zShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets), LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/specials/tileKernel.cu b/libnd4j/include/loops/cuda/specials/tileKernel.cu index d6076d6cb..3a2684579 100644 --- a/libnd4j/include/loops/cuda/specials/tileKernel.cu +++ b/libnd4j/include/loops/cuda/specials/tileKernel.cu @@ -21,11 +21,11 @@ #include namespace sd { - static Nd4jLong __device__ __noinline__ getIndexOffset_(Nd4jLong index, Nd4jLong *shapeInfo) { + static Nd4jLong __device__ __noinline__ getIndexOffset_(Nd4jLong index, Nd4jLong const* shapeInfo) { return shape::getIndexOffset(index, shapeInfo); } - static Nd4jLong __device__ __noinline__ subArrayOffset(Nd4jLong index, Nd4jLong *shapeInfoA, Nd4jLong *shapeInfoB) { + static Nd4jLong __device__ __noinline__ subArrayOffset(Nd4jLong index, Nd4jLong const* shapeInfoA, Nd4jLong const* shapeInfoB) { return shape::subArrayOffset(index, shapeInfoA, shapeInfoB); } @@ -37,7 +37,7 @@ namespace sd { // resultLength - length for output array template static __global__ void - tileKernel(void const *inputBuffer, Nd4jLong *inputShape, void *outputBuffer, Nd4jLong *outputShape, + tileKernel(void const *inputBuffer, Nd4jLong const* inputShape, void *outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength) { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Original code to transform in cuda-based @@ -58,22 +58,22 @@ namespace sd { } - BUILD_SINGLE_TEMPLATE(template __global__ void tileKernel,(void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template __global__ void tileKernel,(void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - void tileKernelH(void const *inputBuffer, Nd4jLong *inputShape, void *outputBuffer, Nd4jLong *outputShape, Nd4jLong resultLength, cudaStream_t *stream) { + void tileKernelH(void const *inputBuffer, Nd4jLong const* inputShape, void *outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, cudaStream_t *stream) { dim3 launchDims(256, 512, 8192); tileKernel << < launchDims.x, launchDims.y, launchDims.z, *stream>>>(inputBuffer, inputShape, outputBuffer, outputShape, resultLength); } - BUILD_SINGLE_TEMPLATE(template void tileKernelH, (void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, cudaStream_t *stream), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void tileKernelH, (void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, cudaStream_t *stream), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // enhancement for tileKernel to different input and output data types: X - output type, Y - input type template static __global__ void - tileKernelDouble(void const *inputBuffer, Nd4jLong *inputShape, void *outputBuffer, Nd4jLong *outputShape, Nd4jLong resultLength, Nd4jLong ews) { + tileKernelDouble(void const *inputBuffer, Nd4jLong const* inputShape, void *outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews) { char ordering = shape::order(outputShape); auto tid = blockIdx.x * blockDim.x + threadIdx.x; int totalThreads = gridDim.x * blockDim.x; @@ -99,13 +99,13 @@ namespace sd { } } - BUILD_SINGLE_TEMPLATE_TWICE(template __global__ void tileKernelDouble, (void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, Nd4jLong ews), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE_TWICE(template __global__ void tileKernelDouble, (void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews), LIBND4J_TYPES); template - void tileKernelHH(void const *inputBuffer, Nd4jLong *inputShape, void *outputBuffer, Nd4jLong *outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream) { + void tileKernelHH(void const *inputBuffer, Nd4jLong const* inputShape, void *outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream) { dim3 launchDims(256, 512, 8192); tileKernelDouble<<>>(inputBuffer, inputShape, outputBuffer, outputShape, resultLength, ews); } - BUILD_SINGLE_TEMPLATE_TWICE(template void tileKernelHH, (void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream),LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE_TWICE(template void tileKernelHH, (void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream),LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/summarystatsreduce.cu b/libnd4j/include/loops/cuda/summarystatsreduce.cu index c858d8098..3d94b9097 100644 --- a/libnd4j/include/loops/cuda/summarystatsreduce.cu +++ b/libnd4j/include/loops/cuda/summarystatsreduce.cu @@ -39,7 +39,7 @@ namespace functions { namespace summarystats { template -void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *z, Nd4jLong *zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot,bool biasCorrected,int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { +void _CUDA_G summaryStatsReduceT(int op, void const* dx, Nd4jLong const* xShapeInfo, int xRank, void *extraParams, void *z, Nd4jLong const* zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot,bool biasCorrected,int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { functions::summarystats::SummaryStatsReduce::transform(op,dx,xShapeInfo,extraParams,z,zShapeInfo,dimension,dimensionLength,biasCorrected,allocationBuffer,reductionBuffer,tadOnlyShapeInfo,tadOffsets); } @@ -103,15 +103,15 @@ void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRa */ template template - _CUDA_D void SummaryStatsReduce::transform(void *vx, Nd4jLong *xShapeInfo, + _CUDA_D void SummaryStatsReduce::transform(void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { - auto dx = static_cast(vx); + auto dx = static_cast(vx); auto z = static_cast(vz); auto extraParams = static_cast(vextraParams); auto reductionBuffer = static_cast(vreductionBuffer); @@ -331,15 +331,15 @@ void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRa template - _CUDA_D void SummaryStatsReduce::transform(const int opNum, void *dx, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_D void SummaryStatsReduce::transform(const int opNum, void const* dx, Nd4jLong const* xShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(dx, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationBuffer, reductionBuffer, tadOnlyShapeInfo, tadOffsets), SUMMARY_STATS_OPS); }; template - _CUDA_H void SummaryStatsReduce::execSummaryStatsReduceScalar(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer) { + _CUDA_H void SummaryStatsReduce::execSummaryStatsReduceScalar(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vextraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto extraParams = static_cast(vextraParams); auto z = reinterpret_cast(vz); auto reductionPointerA = reinterpret_cast(reductionBuffer); @@ -363,9 +363,9 @@ void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRa } template - _CUDA_H void SummaryStatsReduce::execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer) { + _CUDA_H void SummaryStatsReduce::execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vextraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto extraParams = static_cast(vextraParams); @@ -390,9 +390,9 @@ void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRa template - _CUDA_H void SummaryStatsReduce::execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer) { + _CUDA_H void SummaryStatsReduce::execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vextraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto extraParams = static_cast(vextraParams); diff --git a/libnd4j/include/loops/cuda/transform/transform_any.cu b/libnd4j/include/loops/cuda/transform/transform_any.cu index d13b94599..8b00b28fe 100644 --- a/libnd4j/include/loops/cuda/transform/transform_any.cu +++ b/libnd4j/include/loops/cuda/transform/transform_any.cu @@ -30,12 +30,12 @@ using namespace simdOps; template -__global__ void transformAnySimple(void *x, Nd4jLong *xShapeInfo, int xRank, - void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__global__ void transformAnySimple( + const void *x, const Nd4jLong *xShapeInfo, int xRank, + void *params, + void *z, const Nd4jLong *zShapeInfo, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformAny::template transformCuda(x,xShapeInfo,params,z,zShapeInfo,allocationPointer,reductionPointer,tadShapeInfo, tadOffsets); } @@ -45,7 +45,14 @@ namespace functions { namespace transform { template - _CUDA_H void TransformAny::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformAny::executeTransformShaped( + dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_ANY_OPS); DEBUG_KERNEL(stream, opNum); @@ -54,13 +61,14 @@ namespace functions { template template - __device__ void TransformAny::transformCuda(void *vx, Nd4jLong *xShapeInfo, - void *vparams, - void *vz, Nd4jLong *zShapeInfo, - int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + __device__ void TransformAny::transformCuda( + const void *vx, const Nd4jLong *xShapeInfo, + void *vparams, + void *vz, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *vreductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto params = reinterpret_cast(vparams); auto reductionPointer = reinterpret_cast(vreductionPointer); @@ -109,9 +117,17 @@ namespace functions { template template - _CUDA_H void TransformAny::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformAny::intermediateShaped( + dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { + transformAnySimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); - sd::DebugHelper::checkErrorCode(stream, "transformAny(...) failed"); + + sd::DebugHelper::checkErrorCode(stream, "transformAny(...) failed"); } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT TransformAny, , LIBND4J_TYPES, LIBND4J_TYPES); diff --git a/libnd4j/include/loops/cuda/transform/transform_bool.cu b/libnd4j/include/loops/cuda/transform/transform_bool.cu index fec14a745..f9526d296 100644 --- a/libnd4j/include/loops/cuda/transform/transform_bool.cu +++ b/libnd4j/include/loops/cuda/transform/transform_bool.cu @@ -30,12 +30,12 @@ using namespace simdOps; template -__global__ void transformBoolSimple(void *x, Nd4jLong *xShapeInfo, int xRank, - void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__global__ void transformBoolSimple( + const void *x, const Nd4jLong *xShapeInfo, int xRank, + void *params, + void *z, const Nd4jLong *zShapeInfo, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformBool::template transformCuda(x,xShapeInfo,params,z,zShapeInfo,allocationPointer,reductionPointer,tadShapeInfo, tadOffsets); } @@ -45,7 +45,15 @@ namespace functions { namespace transform { template - _CUDA_H void TransformBool::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformBool::executeTransformShaped( + dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { + DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_BOOL_OPS); DEBUG_KERNEL(stream, opNum); @@ -54,13 +62,14 @@ namespace functions { template template - __device__ void TransformBool::transformCuda(void *vx, Nd4jLong *xShapeInfo, - void *vparams, - void *vz, Nd4jLong *zShapeInfo, - int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + __device__ void TransformBool::transformCuda( + const void *vx, const Nd4jLong *xShapeInfo, + void *vparams, + void *vz, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *vreductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto params = static_cast(vparams); auto reductionPointer = static_cast(vreductionPointer); @@ -115,7 +124,13 @@ namespace functions { template template - _CUDA_H void TransformBool::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformBool::intermediateShaped( + dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { transformBoolSimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "transformBool(...) failed"); } diff --git a/libnd4j/include/loops/cuda/transform/transform_float.cu b/libnd4j/include/loops/cuda/transform/transform_float.cu index f631fd4d7..6b6889009 100644 --- a/libnd4j/include/loops/cuda/transform/transform_float.cu +++ b/libnd4j/include/loops/cuda/transform/transform_float.cu @@ -29,12 +29,12 @@ using namespace simdOps; template -__global__ void transformFloatSimple(void *x, Nd4jLong *xShapeInfo, int xRank, +__global__ void transformFloatSimple(const void *x, const Nd4jLong *xShapeInfo, int xRank, void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, + void *z, const Nd4jLong *zShapeInfo, int zRank, int *allocationPointer, void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformFloat::template transformCuda( x, xShapeInfo, @@ -49,7 +49,7 @@ namespace functions { namespace transform { template - _CUDA_H void TransformFloat::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformFloat::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, const void *x, const Nd4jLong *xShape, int xRank, void *extraParams, void *z, const Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_FLOAT_OPS); DEBUG_KERNEL(stream, opNum); @@ -58,16 +58,13 @@ namespace functions { template template - __device__ void TransformFloat::transformCuda( - void *vx, - Nd4jLong *xShapeInfo, - void *vparams, - void *vz, - Nd4jLong *zShapeInfo, - int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + __device__ void TransformFloat::transformCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *vparams, + void *vz, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *vreductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto params = reinterpret_cast(vparams); auto reductionPointer = reinterpret_cast(vreductionPointer); @@ -122,24 +119,27 @@ namespace functions { template __device__ void TransformFloat::transformCudaLegacy( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *params, - void *z, - Nd4jLong *zShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + const int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *params, + void *z, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(transformCuda, PARAMS(x, xShapeInfo, params, z, zShapeInfo, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_FLOAT_OPS); } template template - _CUDA_H void TransformFloat::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformFloat::intermediateShaped( + dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { transformFloatSimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); - sd::DebugHelper::checkErrorCode(stream, "transformFloat(...) failed"); + + sd::DebugHelper::checkErrorCode(stream, "transformFloat(...) failed"); } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT TransformFloat, , LIBND4J_TYPES, FLOAT_TYPES); diff --git a/libnd4j/include/loops/cuda/transform/transform_same.cu b/libnd4j/include/loops/cuda/transform/transform_same.cu index 368a9b602..b03146da9 100644 --- a/libnd4j/include/loops/cuda/transform/transform_same.cu +++ b/libnd4j/include/loops/cuda/transform/transform_same.cu @@ -29,12 +29,12 @@ using namespace simdOps; template -__global__ void transformSameSimple(void *x, Nd4jLong *xShapeInfo, int xRank, +__global__ void transformSameSimple(const void *x, const Nd4jLong *xShapeInfo, int xRank, void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, + void *z, const Nd4jLong *zShapeInfo, int zRank, int *allocationPointer, void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformSame::template transformCuda(x,xShapeInfo,params,z,zShapeInfo,allocationPointer,reductionPointer, tadShapeInfo, tadOffsets); } @@ -44,7 +44,13 @@ namespace functions { namespace transform { template - _CUDA_H void TransformSame::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformSame::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_SAME_OPS); DEBUG_KERNEL(stream, opNum); @@ -53,13 +59,13 @@ namespace functions { template template - __device__ void TransformSame::transformCuda(void *vx, Nd4jLong *xShapeInfo, + __device__ void TransformSame::transformCuda(const void *vx, const Nd4jLong *xShapeInfo, void *vparams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto params = static_cast(vparams); auto reductionPointer = static_cast(vreductionPointer); @@ -113,7 +119,7 @@ namespace functions { template template - _CUDA_H void TransformSame::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformSame::intermediateShaped(dim3 launchDims, cudaStream_t *stream, const void *x, const Nd4jLong *xShape, int xRank, void *extraParams, void *z, const Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { transformSameSimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "transformSame(...) failed"); } diff --git a/libnd4j/include/loops/cuda/transform/transform_strict.cu b/libnd4j/include/loops/cuda/transform/transform_strict.cu index 155e5aa23..f36b50c29 100644 --- a/libnd4j/include/loops/cuda/transform/transform_strict.cu +++ b/libnd4j/include/loops/cuda/transform/transform_strict.cu @@ -29,12 +29,12 @@ using namespace simdOps; template -__global__ void transformStrictSimple(void *x, Nd4jLong *xShapeInfo, int xRank, - void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__global__ void transformStrictSimple(const void *x, const Nd4jLong *xShapeInfo, int xRank, + void *params, + void *z, const Nd4jLong *zShapeInfo, int zRank, + int *allocationPointer, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformStrict::template transformCuda(x,xShapeInfo,params,z,zShapeInfo,allocationPointer,reductionPointer,tadShapeInfo, tadOffsets); } @@ -44,7 +44,13 @@ namespace functions { namespace transform { template - _CUDA_H void TransformStrict::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformStrict::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_STRICT_OPS); DEBUG_KERNEL(stream, opNum); @@ -53,13 +59,13 @@ namespace functions { template template - __device__ void TransformStrict::transformCuda(void *vx, Nd4jLong *xShapeInfo, + __device__ void TransformStrict::transformCuda(const void *vx, const Nd4jLong *xShapeInfo, void *vparams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto params = static_cast(vparams); auto reductionPointer = static_cast(vreductionPointer); @@ -114,7 +120,13 @@ namespace functions { template template - _CUDA_H void TransformStrict::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformStrict::intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { + transformStrictSimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "transformStrict(...) failed"); } diff --git a/libnd4j/include/loops/cuda/type_conversions.cu b/libnd4j/include/loops/cuda/type_conversions.cu index 0f753bc56..3ad8e2089 100644 --- a/libnd4j/include/loops/cuda/type_conversions.cu +++ b/libnd4j/include/loops/cuda/type_conversions.cu @@ -221,8 +221,8 @@ namespace sd { * PLEASE NOTE: This kernel doesn't allow loop for data. Basically: grid will be huge. */ template -__global__ static void execEncoderKernelP1(void *dx, Nd4jLong N, void *dz, float threshold) { - auto x = reinterpret_cast (dx); +__global__ static void execEncoderKernelP1(const void *dx, Nd4jLong N, void *dz, float threshold) { + auto x = reinterpret_cast (dx); auto z = reinterpret_cast (dz); //basically, for phase One we want do calculation: how many eligible values we have, and which blocks will be holding data @@ -242,12 +242,12 @@ __global__ static void execEncoderKernelP1(void *dx, Nd4jLong N, void *dz, float ////////////////////////////////////////////////////////////////////////// template -__host__ void encoderKernelP1Generic(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz, float threshold) { +__host__ void encoderKernelP1Generic(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz, float threshold) { execEncoderKernelP1<<>>(dx, N, dz, threshold); sd::DebugHelper::checkErrorCode(stream, "encoderP1(...) failed"); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT encoderKernelP1Generic, (dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz, float threshold), FLOAT_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT encoderKernelP1Generic, (dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz, float threshold), FLOAT_TYPES); ////////////////////////////////////////////////////////////////////////// /* @@ -332,8 +332,8 @@ BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT encoderKernelP3Generic, (dim3 &l * PLEASE NOTE: Z is expected to be memset to 0 */ template -__global__ static void execDecoderKernel(void *dx, Nd4jLong N, void *dz) { - auto x = reinterpret_cast (dx); +__global__ static void execDecoderKernel(const void *dx, Nd4jLong N, void *dz) { + auto x = reinterpret_cast (dx); auto z = reinterpret_cast (dz); int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -359,12 +359,12 @@ __global__ static void execDecoderKernel(void *dx, Nd4jLong N, void *dz) { ////////////////////////////////////////////////////////////////////////// template -__host__ void decoderKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz) { +__host__ void decoderKernelGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz) { execDecoderKernel<<>>(dx, N, dz); sd::DebugHelper::checkErrorCode(stream, "execDecoder(...) failed"); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT decoderKernelGeneric, (dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz), FLOAT_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT decoderKernelGeneric, (dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz), FLOAT_TYPES); ////////////////////////////////////////////////////////////////////////// @@ -450,18 +450,18 @@ BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT cudaEncodeBitmapGeneric, (dim3 & ////////////////////////////////////////////////////////////////////////// template -__global__ static void execCudaDecodeBitmapKernel(void *dx, Nd4jLong N, void *vdz) { +__global__ static void execCudaDecodeBitmapKernel(const void *dx, Nd4jLong N, void *vdz) { auto dz = static_cast(vdz); int tid = blockIdx.x * blockDim.x + threadIdx.x; __shared__ T *shmem; __shared__ FloatBits fb; __shared__ float threshold; - __shared__ int *x; + __shared__ const int *x; if (threadIdx.x == 0){ extern __shared__ char mem[]; shmem = reinterpret_cast(mem); - x = reinterpret_cast(dx); + x = reinterpret_cast(dx); fb.i_ = x[2]; threshold = fb.f_; } @@ -505,12 +505,12 @@ __global__ static void execCudaDecodeBitmapKernel(void *dx, Nd4jLong N, void *vd ////////////////////////////////////////////////////////////////////////// template -__host__ void cudaDecodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *vdz) { +__host__ void cudaDecodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *vdz) { execCudaDecodeBitmapKernel<<>>(dx, N, vdz); sd::DebugHelper::checkErrorCode(stream, "cudeDecodeBitmap(...) failed"); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT cudaDecodeBitmapGeneric, (dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *vdz), FLOAT_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT cudaDecodeBitmapGeneric, (dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *vdz), FLOAT_TYPES); template diff --git a/libnd4j/include/loops/impl/type_conversions.cpp b/libnd4j/include/loops/impl/type_conversions.cpp index 8d6729a94..a2f302d25 100644 --- a/libnd4j/include/loops/impl/type_conversions.cpp +++ b/libnd4j/include/loops/impl/type_conversions.cpp @@ -171,10 +171,10 @@ PRAGMA_OMP_ATOMIC_ARGS(write) } template - void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz) { + void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz) { FloatBits fb; auto z = reinterpret_cast(dz); - auto x = reinterpret_cast(dx); + auto x = reinterpret_cast(dx); int limit = x[0]; fb.i_ = x[2]; float threshold = fb.f_; @@ -215,10 +215,10 @@ PRAGMA_OMP_ATOMIC_ARGS(write) samediff::Threads::parallel_for(func, 0, N); }; - template void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); - template void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); - template void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); - template void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); template void TypeCast::convertToThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); template void TypeCast::convertToThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); diff --git a/libnd4j/include/loops/indexreduce.h b/libnd4j/include/loops/indexreduce.h index 677d83db9..2e8bc33d2 100755 --- a/libnd4j/include/loops/indexreduce.h +++ b/libnd4j/include/loops/indexreduce.h @@ -51,32 +51,74 @@ namespace functions { template class IndexReduce { public: -#ifdef __CUDACC__ +#ifdef __CUDABLAS__ - static __device__ void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int *dimension,int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); + static __device__ void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension,int dimensionLength, + int postProcessOrNot, + int *allocationBuffer, void *reductionBuffer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset); template - static __device__ void aggregatePartials(IndexValue **sPartialsRef, Nd4jLong tid, Nd4jLong numElements,void *extraParams); + static __device__ void aggregatePartials(IndexValue **sPartialsRef, Nd4jLong tid, Nd4jLong numElements, void *extraParams); template - static __device__ void transform(void *dx, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transform(const void *dx, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationBuffer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, const int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, + int op, + const void *dx, const Nd4jLong *xShapeInfo, + int xRank, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo, + int zRank, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationBuffer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeIndexReduce(dim3 launchDims, cudaStream_t *stream, + int op, + const void *dx, const Nd4jLong *xShapeInfo, + int xRank, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo, + int zRank, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationBuffer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); #else - static Nd4jLong execScalar(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams); + static Nd4jLong execScalar(int opNum, const void *x, const Nd4jLong *xShapeInfo, void *extraParams); - static void exec(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); + static void exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset); template - static _CUDA_H Nd4jLong execScalar(void *x, Nd4jLong *xShapeInfo, void *extraParams); + static _CUDA_H Nd4jLong execScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams); template - static _CUDA_H void exec(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); + static _CUDA_H void exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset); #endif }; } diff --git a/libnd4j/include/loops/pairwise_bool.h b/libnd4j/include/loops/pairwise_bool.h index fee96df84..9cc8f220c 100644 --- a/libnd4j/include/loops/pairwise_bool.h +++ b/libnd4j/include/loops/pairwise_bool.h @@ -58,62 +58,52 @@ namespace functions { #ifdef __CUDACC__ template - static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams); + static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams); - static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams); + static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams); #else - static void exec( - const int opNum, - void *dx, - Nd4jLong *xShapeBuffer, - void *y, - Nd4jLong *yShapeBuffer, - void *result, - Nd4jLong *resultShapeBuffer, - void *extraParams, - const uint64_t start, - const uint64_t stop); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeBuffer, + const void *y, const Nd4jLong *yShapeBuffer, + void *result, const Nd4jLong *resultShapeBuffer, + void *extraParams, + uint64_t start, uint64_t stop); - static void exec( - const int opNum, - void *dx, - Nd4jLong xStride, - void *y, - Nd4jLong yStride, - void *result, - Nd4jLong resultStride, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop); + static void exec(int opNum, + const void *dx, Nd4jLong xStride, + const void *y, Nd4jLong yStride, + void *result, Nd4jLong resultStride, + void *extraParams, + Nd4jLong n, + uint64_t start, uint64_t stop); template - static void exec( - void *vx, - Nd4jLong* xShapeBuffer, - void *vy, - Nd4jLong* yShapeBuffer, - void *vresult, - Nd4jLong* resultShapeBuffer, - void *vextraParams, - const uint64_t start, - const uint64_t stop); + static void exec(const void *vx, const Nd4jLong* xShapeBuffer, + const void *vy, const Nd4jLong* yShapeBuffer, + void *vresult, const Nd4jLong* resultShapeBuffer, + void *vextraParams, + uint64_t start, uint64_t stop); template - static void exec(void *vx, - Nd4jLong xStride, - void *vy, - Nd4jLong yStride, - void *vresult, - Nd4jLong resultStride, + static void exec(const void *vx, Nd4jLong xStride, + const void *vy, Nd4jLong yStride, + void *vresult, Nd4jLong resultStride, void *vextraParams, - const Nd4jLong n, - const uint64_t start, - const uint64_t stop); + Nd4jLong n, + uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/pairwise_int.h b/libnd4j/include/loops/pairwise_int.h index 4144963c7..64deebc04 100644 --- a/libnd4j/include/loops/pairwise_int.h +++ b/libnd4j/include/loops/pairwise_int.h @@ -59,62 +59,52 @@ namespace functions { #ifdef __CUDACC__ template - static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams); + static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams); - static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams); + static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams); #else - static void exec( - const int opNum, - void *dx, - Nd4jLong *xShapeBuffer, - void *y, - Nd4jLong *yShapeBuffer, - void *result, - Nd4jLong *resultShapeBuffer, - void *extraParams, - const uint64_t start, - const uint64_t stop); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeBuffer, + const void *y, const Nd4jLong *yShapeBuffer, + void *result, const Nd4jLong *resultShapeBuffer, + void *extraParams, + uint64_t start, uint64_t stop); - static void exec( - const int opNum, - void *dx, - Nd4jLong xStride, - void *y, - Nd4jLong yStride, - void *result, - Nd4jLong resultStride, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop); + static void exec(int opNum, + const void *dx, Nd4jLong xStride, + const void *y, Nd4jLong yStride, + void *result, Nd4jLong resultStride, + void *extraParams, + Nd4jLong n, + uint64_t start, uint64_t stop); template - static void exec( - void *vx, - Nd4jLong* xShapeBuffer, - void *vy, - Nd4jLong* yShapeBuffer, - void *vresult, - Nd4jLong* resultShapeBuffer, - void *vextraParams, - const uint64_t start, - const uint64_t stop); + static void exec(const void *vx, const Nd4jLong* xShapeBuffer, + const void *vy, const Nd4jLong* yShapeBuffer, + void *vresult, const Nd4jLong* resultShapeBuffer, + void *vextraParams, + uint64_t start,uint64_t stop); template - static void exec(void *vx, - Nd4jLong xStride, - void *vy, - Nd4jLong yStride, - void *vresult, - Nd4jLong resultStride, + static void exec(const void *vx, Nd4jLong xStride, + const void *vy, Nd4jLong yStride, + void *vresult, Nd4jLong resultStride, void *vextraParams, - const Nd4jLong n, - const uint64_t start, - const uint64_t stop); + Nd4jLong n, + uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/pairwise_transform.h b/libnd4j/include/loops/pairwise_transform.h index 8576481f5..b3b514df6 100755 --- a/libnd4j/include/loops/pairwise_transform.h +++ b/libnd4j/include/loops/pairwise_transform.h @@ -52,65 +52,55 @@ namespace functions { class PairWiseTransform { public: -#ifdef __CUDACC__ +#ifdef __CUDABLAS__ template - static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams); + static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams); - static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams); + static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams); #endif public: - static void exec( - const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - uint64_t start, - uint64_t stop); + static void exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t start, uint64_t stop); - static void exec( - const int opNum, - void *x, - Nd4jLong xStride, - void *y, - Nd4jLong yStride, - void *z, - Nd4jLong resultStride, - void *extraParams, - Nd4jLong len, - uint64_t start, - uint64_t stop); + static void exec(int opNum, + const void *x, Nd4jLong xStride, + const void *y, Nd4jLong yStride, + void *z, Nd4jLong resultStride, + void *extraParams, + Nd4jLong len, + uint64_t start, uint64_t stop); template - static void exec( - void *vx, - Nd4jLong* xShapeInfo, - void *vy, - Nd4jLong* yShapeInfo, - void *vresult, - Nd4jLong* zShapeInfo, - void *vextraParams, - uint64_t start, - uint64_t stop); + static void exec(const void *vx, const Nd4jLong* xShapeInfo, + const void *vy, const Nd4jLong* yShapeInfo, + void *vresult, const Nd4jLong* zShapeInfo, + void *vextraParams, + uint64_t start, uint64_t stop); template - static void exec(void *vx, - Nd4jLong xStride, - void *vy, - Nd4jLong yStride, - void *vresult, - Nd4jLong resultStride, + static void exec(const void *vx, Nd4jLong xStride, + const void *vy, Nd4jLong yStride, + void *vresult, Nd4jLong resultStride, void *vextraParams, Nd4jLong len, - uint64_t start, - uint64_t stop); + uint64_t start, uint64_t stop); }; } } diff --git a/libnd4j/include/loops/random.h b/libnd4j/include/loops/random.h index 5048e5ce0..9b35f472f 100644 --- a/libnd4j/include/loops/random.h +++ b/libnd4j/include/loops/random.h @@ -38,34 +38,60 @@ namespace functions { class RandomFunction { public: -#ifdef __CUDACC__ +#ifdef __CUDABLAS__ template - static _CUDA_D void execTransformCuda(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static _CUDA_D void execTransformCuda(Nd4jPointer state, + const void *x, const Nd4jLong *xShapeBuffer, + const void *y, const Nd4jLong *yShapeBuffer, + void *z, const Nd4jLong *zShapeBuffer, + void *extraArguments); template - static _CUDA_D void execTransformCuda(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static _CUDA_D void execTransformCuda(Nd4jPointer state, + const void *x, const Nd4jLong *xShapeBuffer, + void *z, const Nd4jLong *zShapeBuffer, + void *extraArguments); template - static _CUDA_D void execTransformCuda(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static _CUDA_D void execTransformCuda(Nd4jPointer state, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); - static _CUDA_H void executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); - static _CUDA_H void executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); - static _CUDA_H void executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static _CUDA_H void executeCudaSingle(dim3& launchDims, cudaStream_t* stream, + int opNum, + Nd4jPointer stateHost, + void *z, const Nd4jLong *zShapeBuffer, + void *extraArguments); + + + static _CUDA_H void executeCudaDouble(dim3& launchDims, cudaStream_t* stream, + int opNum, + Nd4jPointer stateHost, + const void *x, const Nd4jLong *xShapeBuffer, + void *z, const Nd4jLong *zShapeBuffer, + void *extraArguments); + + + static _CUDA_H void executeCudaTriple(dim3& launchDims, cudaStream_t* stream, + int opNum, + Nd4jPointer stateHost, + const void *x, const Nd4jLong *xShapeBuffer, + const void *y, const Nd4jLong *yShapeBuffer, + void *z, const Nd4jLong* zShapeBuffer, + void *extraArguments); #else template - static void execTransform(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(Nd4jPointer state, const void *x, const Nd4jLong *xShapeBuffer, const void *y, const Nd4jLong *yShapeBuffer, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); template - static void execTransform(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(Nd4jPointer state, const void *x, const Nd4jLong *xShapeBuffer, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); template - static void execTransform(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(Nd4jPointer state, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); - static void execTransform(int opNum, Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); - static void execTransform(int opNum, Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); - static void execTransform(int opNum, Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(int opNum, Nd4jPointer state, const void *x, const Nd4jLong *xShapeBuffer, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(int opNum, Nd4jPointer state, const void *x, const Nd4jLong *xShapeBuffer, const void *y, const Nd4jLong *yShapeBuffer, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(int opNum, Nd4jPointer state, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); #endif }; } diff --git a/libnd4j/include/loops/reduce3.h b/libnd4j/include/loops/reduce3.h index 597e450b1..f2496f1fe 100755 --- a/libnd4j/include/loops/reduce3.h +++ b/libnd4j/include/loops/reduce3.h @@ -75,10 +75,23 @@ class Reduce3 { static __device__ void aggregatePartials(void* sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda(void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, int *allocationPointer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo); template - static __device__ void transformAll(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets); + static __device__ void transformAll(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets); /** Perform a reduction @@ -90,54 +103,157 @@ class Reduce3 { @param result where to store the result of the reduction */ template - static __device__ void transform(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + static __device__ void transform(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); - static __device__ void execCuda(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + static __device__ void execCuda(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); - static __device__ void execAllCuda( const int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + static __device__ void execAllCuda(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); - static __device__ void execScalarCuda(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int * allocationPointer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int * allocationPointer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo); - static __host__ void exec(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); - - static __host__ void execAll(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); - - static __host__ void execScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int* allocationPointer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void exec(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); + static __host__ void execAll(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); + static __host__ void execScalar(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int* allocationPointer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo); #else template - static void execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo); + static void execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo); - static void execScalar(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParamsVals, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo); + static void execScalar(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo); template - static void exec(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int64_t start, int64_t stop); + static void exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int64_t start, int64_t stop); template - static void exec(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, int64_t start, int64_t stop); + static void exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + int64_t start, int64_t stop); template - static void execAll(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets, int64_t start, int64_t stop); + static void execAll(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets, + int64_t start, int64_t stop); - static void exec(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *extraParamsVals, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int64_t start, int64_t stop); + static void exec(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int64_t start, int64_t stop); - static void exec(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *extraParamsVals, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, int64_t start, int64_t stop); + static void exec(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + int64_t start, int64_t stop); - static void execAll(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *extraParamsVals, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets, int64_t start, int64_t stop); + static void execAll(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets, + int64_t start, int64_t stop); #endif }; diff --git a/libnd4j/include/loops/reduce_bool.h b/libnd4j/include/loops/reduce_bool.h index 815557d41..a74d53033 100644 --- a/libnd4j/include/loops/reduce_bool.h +++ b/libnd4j/include/loops/reduce_bool.h @@ -58,20 +58,20 @@ namespace functions { static __device__ void aggregatePartials(void *sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __device__ void transformCudaXD( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); template - static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); - static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else /** @@ -83,40 +83,28 @@ namespace functions { * @return */ template - static _CUDA_H Z execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + static _CUDA_H Z execScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams); template - static _CUDA_H void execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + static _CUDA_H void execScalar(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); - static Z execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + static Z execScalar(int opNum, const void *x, const Nd4jLong *xShapeInfo, void *extraParams); static void execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * Execute on the cpu @@ -132,15 +120,12 @@ namespace functions { template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * CPU implementation @@ -152,11 +137,9 @@ namespace functions { * @param resultShapeInfo the shape information */ template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfo); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo); @@ -169,10 +152,7 @@ namespace functions { * @return */ template - static Z _CUDA_H execScalar(void *x, - Nd4jLong xElementWiseStride, - Nd4jLong length, - void *extraParams); + static Z _CUDA_H execScalar(const void *x, Nd4jLong xElementWiseStride, Nd4jLong length, void *extraParams); #endif }; diff --git a/libnd4j/include/loops/reduce_float.h b/libnd4j/include/loops/reduce_float.h index 6ff3f88ab..c78082f8e 100644 --- a/libnd4j/include/loops/reduce_float.h +++ b/libnd4j/include/loops/reduce_float.h @@ -60,20 +60,20 @@ namespace functions { static __device__ void aggregatePartials(void *sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __device__ void transformCudaXD( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); template - static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShape, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShape, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); - static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShape, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShape, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else /** @@ -85,40 +85,30 @@ namespace functions { * @return */ template - static _CUDA_H Z execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *extraParams); + static _CUDA_H Z execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams); template - static _CUDA_H void execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *zShapeInfo); + static _CUDA_H void execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo); static Z execScalar(int opNum, - void *vx, - Nd4jLong *xShapeInfo, - void *extraParams); + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParams); static void execScalar(int opNum, - void *vx, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *zShapeInfo); + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo); static void exec(int opNum, - void *vx, - Nd4jLong *xShapeInfo, + const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + void *vz, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * Execute on the cpu @@ -134,15 +124,12 @@ namespace functions { template - static void _CUDA_H exec(void *vx, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + static void _CUDA_H exec(const void *vx, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * CPU implementation @@ -154,11 +141,9 @@ namespace functions { * @param zShapeInfo the shape information */ template - static void _CUDA_H exec(void *vx, - Nd4jLong *xShapeInfo, + static void _CUDA_H exec(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, - void *vz, - Nd4jLong *zShapeInfo); + void *vz, const Nd4jLong *zShapeInfo); @@ -171,10 +156,7 @@ namespace functions { * @return */ template - static Z _CUDA_H execScalar(void *vx, - Nd4jLong xElementWiseStride, - Nd4jLong length, - void *extraParams); + static Z _CUDA_H execScalar(const void *vx, Nd4jLong xElementWiseStride, Nd4jLong length, void *extraParams); #endif }; diff --git a/libnd4j/include/loops/reduce_long.h b/libnd4j/include/loops/reduce_long.h index 4c83e1057..45ede2985 100644 --- a/libnd4j/include/loops/reduce_long.h +++ b/libnd4j/include/loops/reduce_long.h @@ -57,20 +57,20 @@ namespace functions { static __device__ void aggregatePartials(void *sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __device__ void transformCudaXD( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); template - static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); - static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else @@ -83,40 +83,30 @@ namespace functions { * @return */ template - static _CUDA_H Z execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + static _CUDA_H Z execScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams); template - static _CUDA_H void execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + static _CUDA_H void execScalar(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static Z execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); static void execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * Execute on the cpu @@ -132,15 +122,12 @@ namespace functions { template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * CPU implementation @@ -152,11 +139,9 @@ namespace functions { * @param resultShapeInfo the shape information */ template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfo); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo); @@ -169,10 +154,9 @@ namespace functions { * @return */ template - static Z _CUDA_H execScalar(void *x, - Nd4jLong xElementWiseStride, - Nd4jLong length, - void *extraParams); + static Z _CUDA_H execScalar(const void *x, Nd4jLong xElementWiseStride, + Nd4jLong length, + void *extraParams); #endif }; diff --git a/libnd4j/include/loops/reduce_same.h b/libnd4j/include/loops/reduce_same.h index 641551b6f..5f3622f39 100644 --- a/libnd4j/include/loops/reduce_same.h +++ b/libnd4j/include/loops/reduce_same.h @@ -58,22 +58,22 @@ namespace functions { static __device__ void aggregatePartials(void *sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda( void const* vx, Nd4jLong const *xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo); - static __device__ void execScalarCudaLegacy(int opNum, void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCudaLegacy(int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo); template - static __device__ void transformCudaXD( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCudaXD( void const* vx, Nd4jLong const* xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets); template - static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo); template - static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets); - static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo); - static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets); #else /** @@ -85,40 +85,31 @@ namespace functions { * @return */ template - static _CUDA_H X execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + static _CUDA_H X execScalar(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); template - static _CUDA_H void execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + static _CUDA_H void execScalar(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static X execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); static void execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * Execute on the cpu @@ -134,15 +125,12 @@ namespace functions { template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * CPU implementation @@ -154,11 +142,9 @@ namespace functions { * @param resultShapeInfo the shape information */ template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfo); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo); @@ -171,10 +157,9 @@ namespace functions { * @return */ template - static X _CUDA_H execScalar(void *x, - Nd4jLong xElementWiseStride, - Nd4jLong length, - void *extraParams); + static X _CUDA_H execScalar(const void *x, Nd4jLong xElementWiseStride, + Nd4jLong length, + void *extraParams); #endif }; diff --git a/libnd4j/include/loops/scalar.h b/libnd4j/include/loops/scalar.h index dc3a5b16c..f7333d57d 100755 --- a/libnd4j/include/loops/scalar.h +++ b/libnd4j/include/loops/scalar.h @@ -58,27 +58,77 @@ namespace functions { template __host__ - static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, void* vscalar, void *vextraParams, int *allocPointer); + static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hxShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hzShapeInfo, + const void* vscalar, + void *vextraParams, + int *allocPointer); template __host__ - static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); __host__ - static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *result, Nd4jLong *resultShapeInfo, Nd4jLong *hzShapeInfo, void* scalar, void *extraParams); + static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hxShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, const Nd4jLong *hzShapeInfo, + const void* scalar, + void *extraParams); __host__ - static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else template - static void transform(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t len, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t len, uint64_t start, uint64_t stop); @@ -101,7 +151,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); /** @@ -117,7 +171,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t len, const uint64_t start, const uint64_t stop); + static void transform(const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t len, uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/scalar_bool.h b/libnd4j/include/loops/scalar_bool.h index 0b26531b2..4992df5a1 100644 --- a/libnd4j/include/loops/scalar_bool.h +++ b/libnd4j/include/loops/scalar_bool.h @@ -58,43 +58,106 @@ namespace functions { template __device__ - static void transformCuda(void* scalar, void *vy, Nd4jLong *shapeInfo, void *vparams, void *vresult, Nd4jLong *resultShapeInfo, int *allocationBuffer); + static void transformCuda(const void* scalar, + const void *vy, const Nd4jLong *shapeInfo, + void *vparams, + void *vresult, const Nd4jLong *resultShapeInfo, + int *allocationBuffer); template __device__ - static void transformCuda(Nd4jLong n, void* vx, void *vy, Nd4jLong yEWS, void *vparams, void *vz, Nd4jLong zEWS, int *allocationBuffer); + static void transformCuda(Nd4jLong n, + const void* vx, const void *vy, Nd4jLong yEWS, + void *vparams, + void *vz, Nd4jLong zEWS, + int *allocationBuffer); template __device__ - static void transformCuda(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, void *vscalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void transformCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); template __host__ - static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); template __host__ - static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void* vscalar, void *vextraParams, int *allocPointer); + static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void* vscalar, + void *vextraParams, + int *allocPointer); __host__ - static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void* scalar, void *extraParams); + static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void* scalar, + const void *extraParams); __host__ - static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); - + static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); /* #include "cuda/scalar_temp.cu" */ #else template - static void transform(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t n, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t n, uint64_t start, uint64_t stop); @@ -117,7 +180,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); /** @@ -133,7 +200,10 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t n, const uint64_t start, const uint64_t stop); + static void transform(const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, void *extraParams, + uint64_t n, uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/scalar_int.h b/libnd4j/include/loops/scalar_int.h index dde7af4c7..c3a53199e 100644 --- a/libnd4j/include/loops/scalar_int.h +++ b/libnd4j/include/loops/scalar_int.h @@ -58,40 +58,104 @@ namespace functions { template __device__ - static void transformCuda(void* scalar, void *vy, Nd4jLong *shapeInfo, void *vparams, void *vresult, Nd4jLong *resultShapeInfo, int *allocationBuffer); + static void transformCuda(const void* scalar, + const void *vy, const Nd4jLong *shapeInfo, + void *vparams, + void *vresult, const Nd4jLong *resultShapeInfo, + int *allocationBuffer); template __device__ - static void transformCuda(Nd4jLong n, void* vx, void *vy, Nd4jLong yEWS, void *vparams, void *vz, Nd4jLong zEWS, int *allocationBuffer); + static void transformCuda(Nd4jLong n, + const void* vx, const void *vy, Nd4jLong yEWS, + void *vparams, + void *vz, Nd4jLong zEWS, + int *allocationBuffer); template __device__ - static void transformCuda(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, void *vscalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void transformCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); template __host__ - static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); template __host__ - static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void* vscalar, void *vextraParams, int *allocPointer); + static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void* vscalar, + void *vextraParams, + int *allocPointer); __host__ - static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void* scalar, void *extraParams); + static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void* scalar, + void *extraParams); __host__ - static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); - + static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else template - static void transform(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, + uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t n, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t n, uint64_t start, uint64_t stop); @@ -114,7 +178,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); /** @@ -130,7 +198,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t n, const uint64_t start, const uint64_t stop); + static void transform(const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t n, uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/special_kernels.h b/libnd4j/include/loops/special_kernels.h index 52cdb7fdd..209d35120 100644 --- a/libnd4j/include/loops/special_kernels.h +++ b/libnd4j/include/loops/special_kernels.h @@ -36,44 +36,44 @@ namespace sd { template - _CUDA_H void fillIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong *xShapeInfo, Nd4jLong length, long idx); + _CUDA_H void fillIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, const Nd4jLong *xShapeInfo, Nd4jLong length, long idx); template - _CUDA_H void fillDimensionalIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dX, void *dZ, Nd4jLong *zShapeInfo, Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOffsets); + _CUDA_H void fillDimensionalIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dX, void *dZ, const Nd4jLong *zShapeInfo, const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, const Nd4jLong *tadOffsets); template _CUDA_H void convertToHalfGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong n, half *dz); template - _CUDA_H void tearKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jPointer *targets, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + _CUDA_H void tearKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, Nd4jPointer *targets, + Nd4jLong const* zShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets); template _CUDA_H void shuffleKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void **vdX, Nd4jLong **xShapeInfo, void **vdZ, int N, - int *shuffleMap, Nd4jLong **tadOnlyShapeInfo, Nd4jLong **tadOffsets); + int *shuffleMap, Nd4jLong** tadOnlyShapeInfo, Nd4jLong** tadOffsets); template _CUDA_H void convertHalfsToGeneric(dim3 &launchDims, cudaStream_t *stream, half *dx, Nd4jLong n, void *dz); template _CUDA_H void concatKernelVStackGeneric(dim3 &launchDims, cudaStream_t *stream, int numArrays, Nd4jPointer *data, - Nd4jPointer *inputShapeInfos, void *vz, Nd4jLong *zShapeInfo); + Nd4jPointer *inputShapeInfos, void *vz, Nd4jLong const* zShapeInfo); template _CUDA_H void concatKernelScalarGeneric(dim3 &launchDims, cudaStream_t *stream, int numArrays, Nd4jPointer *data, void *vresult); template _CUDA_H void concatKernelHStackGeneric(dim3 &launchDims, cudaStream_t *stream, int numArrays, Nd4jPointer *data, - Nd4jPointer *inputShapeInfos, void *vresult, Nd4jLong *resultShapeInfo); + Nd4jPointer *inputShapeInfos, void *vresult, Nd4jLong const* resultShapeInfo); template _CUDA_H void concatKernelGeneric(dim3 &launchDims, cudaStream_t *stream, int numArrays, Nd4jPointer *data, - Nd4jPointer *inputShapeInfos, void *vresult, Nd4jLong *resultShapeInfo, - Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers, Nd4jLong *zTadShape, Nd4jLong *zOffsets); + Nd4jPointer *inputShapeInfos, void *vresult, Nd4jLong const* resultShapeInfo, + Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers, Nd4jLong const* zTadShape, Nd4jLong const* zOffsets); template _CUDA_H void pullRowsKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, void *vz, Nd4jLong n, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets); template _CUDA_H void averagingKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void **vdx, void *vdz, int n, Nd4jLong length, bool propagate); @@ -85,20 +85,19 @@ namespace sd { _CUDA_H void flattenKernelGeneric(dim3& launchDims, cudaStream_t *stream, Nd4jPointer *extraPointers, int dOffset, char order, void *vz, Nd4jLong *zShapeInfo, void *vy, Nd4jLong *yShapeInfo); template - _CUDA_H void tileKernelH(void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, cudaStream_t *stream); + _CUDA_H void tileKernelH(void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, cudaStream_t *stream); template - _CUDA_H void tileKernelHH(void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream); - + _CUDA_H void tileKernelHH(void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream); class NDArray; template - _CUDA_H void setDiagonalValueUpper(void* buffer, Nd4jLong* shape, NDArray const& value, int diagonal, Nd4jLong rows, Nd4jLong cols, cudaStream_t& stream); + _CUDA_H void setDiagonalValueUpper(void* buffer, Nd4jLong const* shape, NDArray const& value, int diagonal, Nd4jLong rows, Nd4jLong cols, cudaStream_t& stream); template - _CUDA_H void setDiagonalValueLower(void* buffer, Nd4jLong* shape, NDArray const& value, int diagonal, Nd4jLong rows, Nd4jLong cols, cudaStream_t& stream); + _CUDA_H void setDiagonalValueLower(void* buffer, Nd4jLong const* shape, NDArray const& value, int diagonal, Nd4jLong rows, Nd4jLong cols, cudaStream_t& stream); template - _CUDA_H void templatedSwapUnsafe(void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape, cudaStream_t* theStream); + _CUDA_H void templatedSwapUnsafe(void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape, cudaStream_t* theStream); } diff --git a/libnd4j/include/loops/summarystatsreduce.h b/libnd4j/include/loops/summarystatsreduce.h index 0a429cd2b..1ab06a11b 100755 --- a/libnd4j/include/loops/summarystatsreduce.h +++ b/libnd4j/include/loops/summarystatsreduce.h @@ -270,7 +270,7 @@ namespace functions { #ifdef __CUDACC__ - static inline _CUDA_D Z startingValue(X *input) { + static inline _CUDA_D Z startingValue(X const* input) { return static_cast(0); } @@ -279,62 +279,51 @@ namespace functions { template - static _CUDA_D void transform(void *dx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_D void transform(void const* dx, Nd4jLong const* xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets); - static _CUDA_D void transform(const int opNum, void *dx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_D void transform(const int opNum, void const* dx, Nd4jLong const* xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets); - static _CUDA_H void execSummaryStatsReduceScalar(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer); - static _CUDA_H void execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer); - static _CUDA_H void execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer); + static _CUDA_H void execSummaryStatsReduceScalar(dim3& launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer); + static _CUDA_H void execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer); + static _CUDA_H void execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer); #else static Z execScalar(int opNum, - bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); static void execScalar(int opNum, - bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer); + bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer); static void exec(int opNum, - bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, int dimensionLength); + bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength); template static Z execScalar(bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); template static void execScalar(bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer); template static void exec(bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength); #endif }; } diff --git a/libnd4j/include/loops/transform_any.h b/libnd4j/include/loops/transform_any.h index 22d56a4d3..751328b89 100644 --- a/libnd4j/include/loops/transform_any.h +++ b/libnd4j/include/loops/transform_any.h @@ -57,18 +57,40 @@ class TransformAny { #ifdef __CUDACC__ template - static __device__ void transformCuda(void *vx, Nd4jLong *xShapeInfo, void *params, void *vz, Nd4jLong *zShapeInfo, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *params, + void *vz, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; diff --git a/libnd4j/include/loops/transform_bool.h b/libnd4j/include/loops/transform_bool.h index 56a7f8f7e..5553c164f 100644 --- a/libnd4j/include/loops/transform_bool.h +++ b/libnd4j/include/loops/transform_bool.h @@ -57,27 +57,40 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; } diff --git a/libnd4j/include/loops/transform_float.h b/libnd4j/include/loops/transform_float.h index 1d9b6fb71..4264278ba 100644 --- a/libnd4j/include/loops/transform_float.h +++ b/libnd4j/include/loops/transform_float.h @@ -57,51 +57,55 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static __device__ void transformCudaLegacy( - int opNum, - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCudaLegacy(int opNum, + const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static __device__ void transformCuda( - Nd4jLong n, - void *dy, - Nd4jLong incy, - void *params, - void *result, - Nd4jLong resultStride, - int *allocationPointer, - void *reductionPointer); + static __device__ void transformCuda(Nd4jLong n, + const void *dy, Nd4jLong incy, + void *params, + void *result, Nd4jLong resultStride, + int *allocationPointer, void *reductionPointer); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; } diff --git a/libnd4j/include/loops/transform_same.h b/libnd4j/include/loops/transform_same.h index cb36ba872..cb069ecc9 100644 --- a/libnd4j/include/loops/transform_same.h +++ b/libnd4j/include/loops/transform_same.h @@ -57,29 +57,42 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; } diff --git a/libnd4j/include/loops/transform_strict.h b/libnd4j/include/loops/transform_strict.h index b7ba63e46..903f4e9df 100644 --- a/libnd4j/include/loops/transform_strict.h +++ b/libnd4j/include/loops/transform_strict.h @@ -57,31 +57,44 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; diff --git a/libnd4j/include/loops/type_conversions.h b/libnd4j/include/loops/type_conversions.h index 3d113eefc..b56921435 100644 --- a/libnd4j/include/loops/type_conversions.h +++ b/libnd4j/include/loops/type_conversions.h @@ -67,7 +67,7 @@ namespace sd { static _CUDA_H void convertToThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); template - static _CUDA_H void convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); + static _CUDA_H void convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); FORCEINLINE static _CUDA_H Nd4jLong estimateQuantizedSize(Nd4jLong rawSize) { if (rawSize <= 0) @@ -115,7 +115,7 @@ namespace sd { } template - __host__ void encoderKernelP1Generic(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz, float threshold); + __host__ void encoderKernelP1Generic(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz, float threshold); template @@ -123,14 +123,14 @@ namespace sd { template - __host__ void decoderKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz); + __host__ void decoderKernelGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz); template __host__ void cudaEncodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, void *vdx, Nd4jLong N, int *dz, int *scalar, int *reductionBuffer, float threshold); template - __host__ void cudaDecodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *vdz); + __host__ void cudaDecodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *vdz); __global__ void uniformAdd(int *g_data, int *uniforms, int n, int blockOffset, int baseIndex); diff --git a/libnd4j/include/ops/declarable/generic/boolean/choose.cpp b/libnd4j/include/ops/declarable/generic/boolean/choose.cpp index 9689c9cd5..e5d67baf1 100644 --- a/libnd4j/include/ops/declarable/generic/boolean/choose.cpp +++ b/libnd4j/include/ops/declarable/generic/boolean/choose.cpp @@ -59,7 +59,7 @@ namespace sd { } DECLARE_SHAPE_FN(choose) { - Nd4jLong *shape; + Nd4jLong const* shape; int rank; int mode = INT_ARG(0); auto numResults = NDArrayFactory::create(0L); @@ -67,11 +67,11 @@ namespace sd { auto first = INPUT_VARIABLE(0); auto second = INPUT_VARIABLE(1); if(first->lengthOf() > second->lengthOf()) { - shape = first->getShapeInfo(); + shape = first->shapeInfo(); rank = first->rankOf(); } else { - shape = second->getShapeInfo(); + shape = second->shapeInfo(); rank = second->rankOf(); } @@ -79,7 +79,7 @@ namespace sd { } else { auto first = INPUT_VARIABLE(0); - shape = first->getShapeInfo(); + shape = first->shapeInfo(); rank = first->rankOf(); double scalar = T_ARG(0); diff --git a/libnd4j/include/ops/declarable/generic/boolean/where.cpp b/libnd4j/include/ops/declarable/generic/boolean/where.cpp index c72c10d6b..c26179179 100644 --- a/libnd4j/include/ops/declarable/generic/boolean/where.cpp +++ b/libnd4j/include/ops/declarable/generic/boolean/where.cpp @@ -99,9 +99,9 @@ namespace sd { for (Nd4jLong i = 0; i < condition->lengthOf(); i++) if (condition->e(i)) numOfTrue++; - Nd4jLong *newShape; - + Nd4jLong const* theNewShape; if (numOfTrue > 0) { + Nd4jLong* newShape; ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong); newShape[0] = 2; @@ -114,13 +114,13 @@ namespace sd { newShape[7] = 99; ShapeUtils::updateStridesAndType(newShape, sd::DataType::INT64, 'c'); - newShape = CONSTANT(newShape); + theNewShape = CONSTANT(newShape); } else { - newShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(sd::DataType::INT64); + theNewShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(sd::DataType::INT64); } - return SHAPELIST(newShape); + return SHAPELIST(theNewShape); } } diff --git a/libnd4j/include/ops/declarable/generic/broadcastable/multiply.cpp b/libnd4j/include/ops/declarable/generic/broadcastable/multiply.cpp index 3ddbe57ca..b7635c664 100644 --- a/libnd4j/include/ops/declarable/generic/broadcastable/multiply.cpp +++ b/libnd4j/include/ops/declarable/generic/broadcastable/multiply.cpp @@ -34,8 +34,8 @@ namespace ops { BROADCAST_CHECK_EMPTY(x,y,z); - Nd4jLong* zShapeInfo = nullptr; - const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->getShapeInfo(), y->getShapeInfo(), true, zShapeInfo, block.getWorkspace()); + const Nd4jLong* zShapeInfo = nullptr; + const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->shapeInfo(), y->shapeInfo(), true, zShapeInfo, block.getWorkspace()); REQUIRE_TRUE(areShapesBroadcastable, 0, "MULTIPLY OP: the shapes of x %s and y %s are not suitable for broadcast !", ShapeUtils::shapeAsString(x).c_str(), ShapeUtils::shapeAsString(y).c_str()); auto tZ = BroadcastHelper::broadcastApply(sd::BroadcastOpsTuple::Multiply(), x, y, z); @@ -70,8 +70,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) { auto dLdx = OUTPUT_VARIABLE(0); auto dLdy = OUTPUT_VARIABLE(1); - Nd4jLong* dLdzShapeInfo = nullptr; - const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->getShapeInfo(), y->getShapeInfo(), true, dLdzShapeInfo, block.getWorkspace()); + const Nd4jLong* dLdzShapeInfo = nullptr; + const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->shapeInfo(), y->shapeInfo(), true, dLdzShapeInfo, block.getWorkspace()); REQUIRE_TRUE(areShapesBroadcastable, 0, "MULTIPLY_BP OP: the shapes of x %s and y %s are not suitable for broadcast !", ShapeUtils::shapeAsString(x).c_str(), ShapeUtils::shapeAsString(y).c_str()); REQUIRE_TRUE(shape::equalsSoft(dLdz->shapeInfo(), dLdzShapeInfo), 0, "MULTIPLY_BP OP: wrong shape of next epsilon array (dLdOut), expected is %s, but got %s instead !", ShapeUtils::shapeAsString(dLdzShapeInfo).c_str(), ShapeUtils::shapeAsString(dLdz).c_str()); @@ -102,7 +102,7 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) { auto yTiled = NDArray(dLdz, false, block.launchContext()); y->tile(yTiled); - std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), dLdz->shapeInfo()); dLdy->assign( (*x * *dLdz).reduceAlongDimension(reduce::Sum, axesForY) ); yTiled.applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdx); @@ -111,7 +111,7 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) { auto xTiled = NDArray(dLdz, false, block.launchContext()); x->tile(xTiled); - std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), dLdz->shapeInfo()); dLdx->assign( (*y * *dLdz).reduceAlongDimension(reduce::Sum, axesForX) ); xTiled.applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdy); @@ -122,8 +122,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) { auto yTiled = NDArray(dLdz, false, block.launchContext()); x->tile(xTiled); y->tile(yTiled); - std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo()); - std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), dLdz->shapeInfo()); + std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), dLdz->shapeInfo()); dLdx->assign( (*y * *dLdz).reduceAlongDimension(reduce::Sum, axesForX) ); dLdy->assign( (*x * *dLdz).reduceAlongDimension(reduce::Sum, axesForY) ); diff --git a/libnd4j/include/ops/declarable/generic/broadcastable/percentile.cpp b/libnd4j/include/ops/declarable/generic/broadcastable/percentile.cpp index e2bf723b3..f5fbd4b18 100644 --- a/libnd4j/include/ops/declarable/generic/broadcastable/percentile.cpp +++ b/libnd4j/include/ops/declarable/generic/broadcastable/percentile.cpp @@ -64,8 +64,7 @@ CUSTOM_OP_IMPL(percentile, 1, 1, false, 1, -2) { DECLARE_SHAPE_FN(percentile) { - - Nd4jLong* inputShapeInfo = inputShape->at(0); + auto inputShapeInfo = inputShape->at(0); const int keepDims = block.getTArguments()->size() > 2 ? T_ARG(2) : 0.; // false is default const int axisArrRank = block.getIArguments()->size(); @@ -80,7 +79,7 @@ DECLARE_SHAPE_FN(percentile) { } std::vector axises = *block.getIArguments(); - Nd4jLong* outputShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShapeInfo), axises, inputShapeInfo, keepDims, false, block.getWorkspace()); + auto outputShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShapeInfo), axises, inputShapeInfo, keepDims, false, block.getWorkspace()); return SHAPELIST(outputShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/broadcastable/pow.cpp b/libnd4j/include/ops/declarable/generic/broadcastable/pow.cpp index 5a1ac02c5..8ceb61e18 100644 --- a/libnd4j/include/ops/declarable/generic/broadcastable/pow.cpp +++ b/libnd4j/include/ops/declarable/generic/broadcastable/pow.cpp @@ -62,8 +62,8 @@ namespace ops { auto dLdx = OUTPUT_VARIABLE(0); auto dLdy = OUTPUT_VARIABLE(1); - Nd4jLong* dLdzShapeInfo = nullptr; - const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->getShapeInfo(), y->getShapeInfo(), true, dLdzShapeInfo, block.getWorkspace()); + const Nd4jLong* dLdzShapeInfo = nullptr; + const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->shapeInfo(), y->shapeInfo(), true, dLdzShapeInfo, block.getWorkspace()); REQUIRE_TRUE(areShapesBroadcastable, 0, "POW_BP OP: the shapes of x %s" " and y %s are not suitable for broadcast !", ShapeUtils::shapeAsString(x).c_str(), ShapeUtils::shapeAsString(y).c_str()); @@ -82,7 +82,7 @@ namespace ops { dLdy->assign(temp); } else { - std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), dLdz->shapeInfo()); dLdy->assign(temp.reduceAlongDimension(reduce::Sum, axesForY)); // dL/dy = sum(c * dL/dz) } @@ -94,7 +94,7 @@ namespace ops { dLdx->assign(temp); // dLdx = a*dL/dz } else { - std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), dLdz->shapeInfo()); dLdx->assign(temp.reduceAlongDimension(reduce::Sum, axesForX)); // dLdx = a*dL/dz } diff --git a/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp b/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp index d7fab45a6..4b77e2a45 100644 --- a/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp +++ b/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp @@ -26,7 +26,7 @@ namespace sd { namespace ops { CUSTOM_OP_IMPL(decode_bitmap, 2, 1, true, 0, 0) { - auto encoded = INPUT_VARIABLE(1); + const auto encoded = INPUT_VARIABLE(1); auto updates = OUTPUT_VARIABLE(0); helpers::decodeBitmap(block.launchContext(), encoded, updates); diff --git a/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp b/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp index 6b02f6d70..81831e3fc 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp @@ -94,7 +94,8 @@ namespace sd { } auto resShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace()); if (shapeOf[rank - 1] == 0) { - ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); +// ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); + resShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(in0)); } return SHAPELIST(resShape); } @@ -117,7 +118,8 @@ namespace sd { } auto resShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace()); if (shapeOf[rank - 1] == 0) { - ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); + resShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(in1)); +// ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); } return SHAPELIST(resShape); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp index 9d4a00be3..deabe8443 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp @@ -36,7 +36,7 @@ namespace sd { } DECLARE_SHAPE_FN(matrix_diag_part) { - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; auto in = inputShape->at(0); int inRank = shape::rank(in); @@ -49,14 +49,15 @@ namespace sd { outShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(lastDimension, ArrayOptions::dataType(in)); } else { - ALLOCATE(outShapeInfo, block.getWorkspace(), shape::shapeInfoLength(outRank), Nd4jLong); - outShapeInfo[0] = outRank; + Nd4jLong* anShapeInfo; + ALLOCATE(anShapeInfo, block.getWorkspace(), shape::shapeInfoLength(outRank), Nd4jLong); + anShapeInfo[0] = outRank; for(int i = 0; i < outRank - 1; ++i) - outShapeInfo[i + 1] = shape::sizeAt(in, i); - outShapeInfo[outRank] = lastDimension; + anShapeInfo[i + 1] = shape::sizeAt(in, i); + anShapeInfo[outRank] = lastDimension; - ShapeUtils::updateStridesAndType(outShapeInfo, in, shape::order(in)); - outShapeInfo = CONSTANT(outShapeInfo); + ShapeUtils::updateStridesAndType(anShapeInfo, in, shape::order(in)); + outShapeInfo = CONSTANT(anShapeInfo); } return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp index 2268a9e9c..edd10e6ea 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp @@ -38,7 +38,7 @@ namespace sd { DECLARE_SHAPE_FN(matrix_determinant) { auto inShape = inputShape->at(0); - Nd4jLong* determinantShape; + Nd4jLong const* determinantShape; int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar if (targetRank == 0) { // scalar only @@ -85,7 +85,7 @@ namespace sd { DECLARE_SHAPE_FN(log_matrix_determinant) { auto inShape = inputShape->at(0); - Nd4jLong* determinantShape; + Nd4jLong const* determinantShape; int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar if (targetRank == 0) { // scalar only @@ -126,7 +126,7 @@ namespace sd { DECLARE_SHAPE_FN(logdet) { auto inShape = inputShape->at(0); - Nd4jLong* determinantShape; + Nd4jLong const* determinantShape; int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar if (targetRank == 0) { // scalar only diff --git a/libnd4j/include/ops/declarable/generic/linalg/qr.cpp b/libnd4j/include/ops/declarable/generic/linalg/qr.cpp index 2cf9156ce..9a351a13f 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/qr.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/qr.cpp @@ -44,8 +44,8 @@ namespace sd { DECLARE_SHAPE_FN(qr) { auto inShape = inputShape->at(0); - Nd4jLong* shapeQ; - Nd4jLong* shapeR; + Nd4jLong const* shapeQ; + Nd4jLong const* shapeR; int targetRank = shape::rank(inShape); // last two dimensions will be reduced to scalar auto fullMatricies = false; diff --git a/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp b/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp index 812588710..d745b0209 100644 --- a/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp @@ -49,7 +49,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss, 3, 1, false, 0, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray E = (*predictions - *labels).transform(sd::transform::Abs); E *= *weightsBroad; @@ -118,7 +118,7 @@ DECLARE_SHAPE_FN(absolute_difference_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "ABSOLUTE_DIFFERENCE_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -164,7 +164,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray E = *predictions - *labels; @@ -183,7 +183,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -209,7 +209,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -237,7 +237,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp b/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp index 10995c90b..4d134f6b1 100644 --- a/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp @@ -61,7 +61,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss, 3, 1, false, 0, 2) { // perform weights broadcasting/tile to E if it is necessary auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(&E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); // multiply E on weights E *= (*weightsBroad); @@ -141,7 +141,7 @@ DECLARE_SHAPE_FN(cosine_distance_loss) { DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); // evaluate output shapeInfo - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); else { // in this case output has the same shape as labels reduced by dim axis @@ -186,11 +186,11 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { REQUIRE_TRUE(labels->isSameShape(predictions), 0, "COSINE_DISTANCE_LOSS_GRAD OP: labels and predictions arrays must have the same shapes, but got %s and %s correspondingly !", ShapeUtils::shapeAsString(labels).c_str(), ShapeUtils::shapeAsString(predictions).c_str()); // only 4 possible reduction modes exist REQUIRE_TRUE(reductionMode==0 || reductionMode==1 || reductionMode==2 || reductionMode==3, 0, "COSINE_DISTANCE_LOSS_GRAD OP: reduction mode value is not acceptable, possible values are 0, 1, 2, 3, but got %i instead!", reductionMode); - auto lossShapeInfo = ShapeUtils::evalReduceShapeInfo(predictions->ordering(), dimensions, predictions->getShapeInfo(), true, false, block.getWorkspace()); + auto lossShapeInfo = ShapeUtils::evalReduceShapeInfo(predictions->ordering(), dimensions, predictions->shapeInfo(), true, false, block.getWorkspace()); // weights array can be single scalar or has the same shape as loss, and must be broadcastable to loss shape REQUIRE_TRUE(weights->isScalar() || weights->rankOf() == shape::rank(lossShapeInfo), 0, "COSINE_DISTANCE_LOSS_GRAD OP: weights array should be scalar or have the same rank as loss array, but got %i and %i correspondingly!", weights->rankOf(), shape::rank(lossShapeInfo)); // check whether broadcast operation is possible for weights array - REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(weights->getShapeInfo(), lossShapeInfo), 0, "COSINE_DISTANCE_LOSS_GRAD OP: shapes of weights and loss arrays should be broadcastable, but got weights = %s and loss = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(lossShapeInfo).c_str()); + REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(weights->shapeInfo(), lossShapeInfo), 0, "COSINE_DISTANCE_LOSS_GRAD OP: shapes of weights and loss arrays should be broadcastable, but got weights = %s and loss = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(lossShapeInfo).c_str()); // input dimension can't be larger than labels/predictions/weights rank REQUIRE_TRUE(dim < labels->rankOf(), 0, "COSINE_DISTANCE_LOSS_GRAD OP: input reduction dimension (got %i) must be < labels rank %i!", dim, labels->rankOf()); @@ -199,7 +199,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { // perform weights broadcasting/tile to E if it is necessary auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(&E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); dLdp->assign(-*labels); dLdl->assign(-*predictions); @@ -215,7 +215,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { } else { if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -248,7 +248,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { else { if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -283,7 +283,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { else { if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeights; } diff --git a/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp index 7d8eeec3a..fe66387a8 100644 --- a/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp @@ -48,7 +48,7 @@ namespace sd { // perform weights broadcasting/tile to logits if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(logits)) - weightsBroad = new NDArray(weights->tileToShape(logits->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(logits->shapeInfo())); // We first need to convert binary labels to -1/1 labels (as floats) NDArray E = 1.f - (*labels * 2.f - 1.f) * (*logits); @@ -125,7 +125,7 @@ namespace sd { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "HINGE_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -167,7 +167,7 @@ namespace sd { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(logits)) - weightsBroad = new NDArray(weights->tileToShape(logits->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(logits->shapeInfo())); // We first need to convert binary labels to -1/1 labels (as floats) NDArray z = (*labels * 2.f - 1.f); @@ -176,7 +176,7 @@ namespace sd { E.applyScalar(scalar::RELU, 0.0f, E); // turn E into gradient mask - NDArray gradientMask(E.getShapeInfo(), block.getWorkspace()); + NDArray gradientMask(E.shapeInfo(), block.getWorkspace()); E.applyTransform(sd::transform::Sign, gradientMask); dLdp->assign(-z * gradientMask); @@ -192,7 +192,7 @@ namespace sd { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -221,7 +221,7 @@ namespace sd { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -250,7 +250,7 @@ namespace sd { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp index a29bd1cf2..df57092e1 100644 --- a/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp @@ -50,11 +50,11 @@ CUSTOM_OP_IMPL(huber_loss, 3, 1, false, 1, 1) { // perform weights broadcasting/tile to predictions if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); auto error = *predictions - *labels; error.applyTransform(transform::Abs, error); - NDArray quadratic(error.getShapeInfo(), block.getWorkspace()); + NDArray quadratic(error.shapeInfo(), block.getWorkspace()); error.applyScalar(scalar::MinPairwise, delta, quadratic); NDArray E = quadratic * quadratic * 0.5f + (error - quadratic)*delta; @@ -130,7 +130,7 @@ DECLARE_SHAPE_FN(huber_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "HUBER_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -170,7 +170,7 @@ DECLARE_SHAPE_FN(huber_loss) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray diff = *predictions - *labels; NDArray absDiff(diff); @@ -180,10 +180,10 @@ DECLARE_SHAPE_FN(huber_loss) { NDArray E = quadratic * quadratic * 0.5f + (absDiff - quadratic)*delta; - NDArray lteMask(diff.getShapeInfo(), BOOL, true, block.launchContext()); + NDArray lteMask(diff.shapeInfo(), BOOL, true, block.launchContext()); absDiff.applyScalar(scalar::LessThanOrEqual, delta, lteMask); - NDArray gtMask(diff.getShapeInfo(), BOOL, true, block.launchContext()); + NDArray gtMask(diff.shapeInfo(), BOOL, true, block.launchContext()); absDiff.applyScalar(scalar::GreaterThan, delta, gtMask); NDArray signDiff(diff); @@ -207,7 +207,7 @@ DECLARE_SHAPE_FN(huber_loss) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -236,7 +236,7 @@ DECLARE_SHAPE_FN(huber_loss) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -265,7 +265,7 @@ DECLARE_SHAPE_FN(huber_loss) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } @@ -306,9 +306,9 @@ DECLARE_SHAPE_FN(huber_loss) { DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong *dLdpShapeInfo = ShapeBuilders::copyShapeInfoAndType(predictionsShapeInfo, outType, false, block.getWorkspace()); - Nd4jLong *dLdwShapeInfo = ShapeBuilders::copyShapeInfoAndType(weightsShapeInfo, outType, false, block.getWorkspace()); - Nd4jLong *dLdlShapeInfo = ShapeBuilders::copyShapeInfoAndType(labelsShapeInfo, outType, false, block.getWorkspace()); + auto dLdpShapeInfo = ShapeBuilders::copyShapeInfoAndType(predictionsShapeInfo, outType, false, block.getWorkspace()); + auto dLdwShapeInfo = ShapeBuilders::copyShapeInfoAndType(weightsShapeInfo, outType, false, block.getWorkspace()); + auto dLdlShapeInfo = ShapeBuilders::copyShapeInfoAndType(labelsShapeInfo, outType, false, block.getWorkspace()); return SHAPELIST(dLdpShapeInfo, dLdwShapeInfo, dLdlShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp index 99140a394..e43e7b1d1 100644 --- a/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp @@ -52,7 +52,7 @@ CUSTOM_OP_IMPL(log_loss, 3, 1, false, 1, 1) { // perform weights broadcasting/tile to predictions if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray E = -(*labels)*((*predictions + epsilon).transform(transform::Log)) - (1. - *labels)*(((1. + epsilon) - *predictions).transform(transform::Log)); @@ -127,7 +127,7 @@ DECLARE_SHAPE_FN(log_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "LOG_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -173,7 +173,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray predictPlusEps = *predictions + epsilon; NDArray oneMinusLabels = 1. - *labels; @@ -196,7 +196,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -227,7 +227,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -255,7 +255,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / numOfNonZeroWeights); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp b/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp index 20e03e92b..b39326071 100644 --- a/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp @@ -50,10 +50,10 @@ namespace ops { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(log_predictions)) - weightsBroad = new NDArray(weights->tileToShape(log_predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(log_predictions->shapeInfo())); - NDArray E(labels->getShapeInfo(), block.getWorkspace()); + NDArray E(labels->shapeInfo(), block.getWorkspace()); if (computeFullLoss) labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, *log_predictions, E); else @@ -130,7 +130,7 @@ namespace ops { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "LOG_POISSON_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -172,14 +172,14 @@ namespace ops { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(log_predictions)) - weightsBroad = new NDArray(weights->tileToShape(log_predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(log_predictions->shapeInfo())); - NDArray E(labels->getShapeInfo(), block.getWorkspace()); + NDArray E(labels->shapeInfo(), block.getWorkspace()); if (computeFullLoss) { labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, *log_predictions, E); - NDArray rDiv(labels->getShapeInfo(), block.getWorkspace()); + NDArray rDiv(labels->shapeInfo(), block.getWorkspace()); labels->applyScalar(scalar::ReverseDivide, 0.5f, rDiv); dLdl->assign(rDiv + labels->transform(transform::Log) + -(*log_predictions)); } else { @@ -200,7 +200,7 @@ namespace ops { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -229,7 +229,7 @@ namespace ops { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -258,7 +258,7 @@ namespace ops { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } @@ -299,9 +299,9 @@ namespace ops { DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong *dLdpShapeInfo = ShapeBuilders::copyShapeInfoAndType(predictionsShapeInfo, outType, false, block.getWorkspace()); - Nd4jLong *dLdwShapeInfo = ShapeBuilders::copyShapeInfoAndType(weightsShapeInfo, outType, false, block.getWorkspace()); - Nd4jLong *dLdlShapeInfo = ShapeBuilders::copyShapeInfoAndType(labelsShapeInfo, outType, false, block.getWorkspace()); + auto dLdpShapeInfo = ShapeBuilders::copyShapeInfoAndType(predictionsShapeInfo, outType, false, block.getWorkspace()); + auto dLdwShapeInfo = ShapeBuilders::copyShapeInfoAndType(weightsShapeInfo, outType, false, block.getWorkspace()); + auto dLdlShapeInfo = ShapeBuilders::copyShapeInfoAndType(labelsShapeInfo, outType, false, block.getWorkspace()); return SHAPELIST(dLdpShapeInfo, dLdwShapeInfo, dLdlShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp b/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp index f8006a3ed..5a0e20807 100644 --- a/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp @@ -128,7 +128,7 @@ namespace sd { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); E *= *weightsBroad; @@ -197,7 +197,7 @@ namespace sd { ShapeUtils::shapeAsString(labelsShapeInfo).c_str(), ShapeUtils::shapeAsString(predictionsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -262,7 +262,7 @@ namespace sd { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); switch (reductionMode) { @@ -273,7 +273,7 @@ namespace sd { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -300,7 +300,7 @@ namespace sd { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -328,7 +328,7 @@ namespace sd { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp b/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp index b0ccf968b..fd00a0364 100644 --- a/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp @@ -48,9 +48,9 @@ CUSTOM_OP_IMPL(mean_sqerr_loss, 3, 1, false, 0, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); - NDArray E(labels->getShapeInfo(), false, block.launchContext()); + NDArray E(labels->shapeInfo(), false, block.launchContext()); predictions->applyPairwiseTransform(pairwise::SquaredSubtract, *labels, E); // multiply E on weights @@ -126,7 +126,7 @@ DECLARE_SHAPE_FN(mean_sqerr_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "MEAN_SQERR_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -171,7 +171,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray diff = *predictions - *labels; @@ -191,7 +191,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -218,7 +218,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -246,7 +246,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp b/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp index 28d66bc93..f2e665bdb 100644 --- a/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp @@ -50,7 +50,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss, 3, 1, false, 1, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(logits)) - weightsBroad = new NDArray(weights->tileToShape(logits->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(logits->shapeInfo())); // If labelsSmoothing is nonzero, smooth the labels towards 1/2: auto newLabels = labels; @@ -137,7 +137,7 @@ DECLARE_SHAPE_FN(sigm_cross_entropy_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "SIGM_CROSS_ENTROPY_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -181,7 +181,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(logits)) - weightsBroad = new NDArray(weights->tileToShape(logits->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(logits->shapeInfo())); // If labelsSmoothing is nonzero, smooth the labels towards 1/2: auto newLabels = labels; @@ -211,7 +211,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -240,7 +240,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -268,7 +268,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / numOfNonZeroWeightsScalar); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp index 3ea9ce2bd..f70a58a10 100644 --- a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp @@ -80,7 +80,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss, 3, 1, false, 1, 1) { if(E.rankOf() == 1 && weights->isVector() && weights->rankOf() > 1) weightsBroad = new NDArray(weights->reshape(weights->ordering(), {weights->lengthOf()})); else - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); } // multiply E on weights @@ -158,7 +158,7 @@ DECLARE_SHAPE_FN(softmax_cross_entropy_loss) { REQUIRE_TRUE(shape::shapeEquals(logitsShapeInfo, labelsShapeInfo), 0, "SOFTMAX_CROSS_ENTROPY_LOSS OP: labels and logits arrays must have the same shapes, but got %s and %s correspondingly!", ShapeUtils::shapeAsString(labelsShapeInfo).c_str(), ShapeUtils::shapeAsString(logitsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -207,11 +207,11 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { REQUIRE_TRUE(labels->isSameShape(logits), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: labels and logits arrays must have the same shapes, but got %s and %s correspondingly !", ShapeUtils::shapeAsString(labels).c_str(), ShapeUtils::shapeAsString(logits).c_str()); // only 4 possible reduction modes exist REQUIRE_TRUE(reductionMode==0 || reductionMode==1 || reductionMode==2 || reductionMode==3, 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: reduction mode value is not acceptable, possible values are 0, 1, 2, 3, but got %i instead!", reductionMode); - auto lossShapeInfo = ShapeUtils::evalReduceShapeInfo(logits->ordering(), dimensions, logits->getShapeInfo(), false, false, block.getWorkspace()); + auto lossShapeInfo = ShapeUtils::evalReduceShapeInfo(logits->ordering(), dimensions, logits->shapeInfo(), false, false, block.getWorkspace()); // weights array can be single scalar or has the same shape as loss, and must be broadcastable to loss shape REQUIRE_TRUE(weights->isScalar() || weights->rankOf() == shape::rank(lossShapeInfo), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: weights array should be scalar or have the same rank as loss array, but got %i and %i correspondingly!", weights->rankOf(), shape::rank(lossShapeInfo)); // check whether broadcast operation is possible for weights array - REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(weights->getShapeInfo(), lossShapeInfo), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: shapes of weights and loss arrays should be broadcastable, but got weights = %s and loss = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(lossShapeInfo).c_str()); + REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(weights->shapeInfo(), lossShapeInfo), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: shapes of weights and loss arrays should be broadcastable, but got weights = %s and loss = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(lossShapeInfo).c_str()); // smoothing is possible for rank of logits/labels > 1 REQUIRE_TRUE(labels->rankOf() > 1 || (labels->rankOf() == 1 && labelsSmoothing == 0.), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: smoothing is not possible when rank of labels/ logits = 1 !"); @@ -220,7 +220,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { NDArray* cLabels = new NDArray(labels->cast(weights->dataType())); NDArray* newLabels = cLabels; if(labelsSmoothing != 0.) { - newLabels = new NDArray(labels->getShapeInfo(), dLdl->dataType(), false, block.launchContext()); + newLabels = new NDArray(labels->shapeInfo(), dLdl->dataType(), false, block.launchContext()); newLabels->assign((1.f - labelsSmoothing) * *cLabels + labelsSmoothing / cLabels->sizeAt(1)); } @@ -240,7 +240,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { // perform weights broadcasting/tile to E if it is necessary auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(&E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); dimensions = ShapeUtils::evalDimsToExclude(dLdp->rankOf(), dimensions); @@ -257,7 +257,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { dLdl->applyBroadcast(sd::broadcast::Multiply, dimensions, *weightsBroad, *dLdl); if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -293,7 +293,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { dLdl->applyBroadcast(sd::broadcast::Multiply, dimensions, temp, *dLdl); if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -330,7 +330,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { dLdl->applyBroadcast(sd::broadcast::Multiply, dimensions, temp, *dLdl); if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeights; } diff --git a/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp b/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp index e69b370ca..56684c569 100644 --- a/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp @@ -224,7 +224,7 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) { const bool keepUnitiesInShape = inRank == mean->rankOf(); // inverse batch size 1/N - const float Ninv = 1.f * shape::tadLength(input->getShapeInfo(), axes.data(), axes.size()) / input->lengthOf(); + const float Ninv = 1.f * shape::tadLength(input->shapeInfo(), axes.data(), axes.size()) / input->lengthOf(); // input - mean NDArray xMinusMean(input); // empty array with same shape as input @@ -322,8 +322,8 @@ DECLARE_TYPES(batchnorm_bp) { DECLARE_SHAPE_FN(batchnorm_bp) { - Nd4jLong* inShapeInfo = inputShape->at(0); - Nd4jLong* meanShapeInfo = inputShape->at(1); + Nd4jLong const* inShapeInfo = inputShape->at(0); + Nd4jLong const* meanShapeInfo = inputShape->at(1); const bool applyScale = (bool)INT_ARG(0); const bool applyOffset = (bool)INT_ARG(1); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/conv1d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/conv1d.cpp index 27081b545..881e60105 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/conv1d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/conv1d.cpp @@ -98,7 +98,7 @@ DECLARE_SHAPE_FN(conv1d) { auto inputShapeInfo = inputShape->at(0); auto weightsShapeInfo = inputShape->at(1); - Nd4jLong* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; + Nd4jLong const* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; int kW = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast(shape::sizeAt(weightsShapeInfo, 0)); // filter(kernel) width int sW = INT_ARG(1); // strides width @@ -240,8 +240,8 @@ DECLARE_SHAPE_FN(conv1d_bp) { auto inputShapeInfo = inputShape->at(0); // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW) auto weightsShapeInfo = inputShape->at(1); // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC] - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW), epsilon_next + Nd4jLong const* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] + Nd4jLong const* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW), epsilon_next const int rank = 3; REQUIRE_TRUE(inputShapeInfo[0] == rank, 0, "CUSTOM CONV1D_BP OP: rank of input array must be equal to %i, but got %i instead !", rank, inputShapeInfo[0]); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/conv3d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/conv3d.cpp index 0657f6dc2..889a01b9a 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/conv3d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/conv3d.cpp @@ -300,10 +300,10 @@ CUSTOM_OP_IMPL(conv3dnew_bp, 3, 2, false, 0, 13) { DECLARE_SHAPE_FN(conv3dnew_bp) { - Nd4jLong* inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW) - Nd4jLong* weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, iC, oC], [oC, iC, kD, kH, kW], [oC, kD, kH, kW, iC] - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next + auto inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW) + auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, iC, oC], [oC, iC, kD, kH, kW], [oC, kD, kH, kW, iC] + Nd4jLong const* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] + Nd4jLong const* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next int kD = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast(shape::sizeAt(weightsShapeInfo, 0));// filter(kernel) depth int kH = INT_ARG(1) > 0 ? INT_ARG(1) : static_cast(shape::sizeAt(weightsShapeInfo, 1));// filter(kernel) height diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp index 8d6c0e3a7..e0440692b 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp @@ -264,8 +264,8 @@ DECLARE_SHAPE_FN(deconv2d_bp) { auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCDHW) auto weightsShapeInfo = inputShape->at(1); // [kH, kW, oC, iC], [iC, oC, kH, kW], [iC, kH, kW, oC] - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCDHW), epsilon_next + Nd4jLong const* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] + auto gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCDHW), epsilon_next const int rank = 4; REQUIRE_TRUE(shape::rank(inputShapeInfo) == rank, 0, "CUSTOM DECONV2D_BP OP: rank of input array must be equal to %i, but got %i instead !", rank, shape::rank(inputShapeInfo)); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/deconv3d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/deconv3d.cpp index ab6e49836..7c68ee74c 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/deconv3d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/deconv3d.cpp @@ -284,8 +284,8 @@ DECLARE_SHAPE_FN(deconv3d_bp) { auto inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW) auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, oC, iC], [iC, oC, kD, kH, kW], [iC, kD, kH, kW, oC] - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next + auto biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] + auto gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next const int rank = 5; REQUIRE_TRUE(shape::rank(inputShapeInfo) == rank, 0, "CUSTOM DECONV3D_BP OP: rank of input array must be equal to %i, but got %i instead !", rank, shape::rank(inputShapeInfo)); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/depthwiseConv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/depthwiseConv2d.cpp index 30580e7a6..744512a13 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/depthwiseConv2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/depthwiseConv2d.cpp @@ -74,10 +74,9 @@ CUSTOM_OP_IMPL(depthwise_conv2d, 2, 1, false, 0, 9) { ->setAllowedOutputTypes({ALL_FLOATS}); } DECLARE_SHAPE_FN(depthwise_conv2d) { - - Nd4jLong* inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) - Nd4jLong* weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC] - Nd4jLong* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC] = iC*mC + auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) + auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC] + auto biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC] = iC*mC const int rank = 4; REQUIRE_TRUE(shape::rank(inputShapeInfo) == rank, 0, "CUSTOM DEPTHWISECONV2D OP: rank of input array must be equal to %i, but got %i instead !", rank, inputShapeInfo[0]); @@ -196,11 +195,10 @@ CUSTOM_OP_IMPL(depthwise_conv2d_bp, 3, 2, false, 0, 9) { ////////////////////////////////////////////////////////////////////// DECLARE_SHAPE_FN(depthwise_conv2d_bp) { - - Nd4jLong* inputShapeInfo = inputShape->at(0); - Nd4jLong* weightsShapeInfo = inputShape->at(1); - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); + auto inputShapeInfo = inputShape->at(0); + auto weightsShapeInfo = inputShape->at(1); + auto biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; + auto gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); const int rank = 4; REQUIRE_TRUE(shape::rank(inputShapeInfo) == rank, 0, "CUSTOM DEPTHWISECONV2D_BP OP: rank of input array must be equal to %i, but got %i instead !", rank, shape::rank(inputShapeInfo)); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp index ea1193400..c3ecddf53 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp @@ -98,8 +98,6 @@ namespace ops { std::vector strides(4); std::vector rates(4); - Nd4jLong *newShape; - if (block.width() > 2) { auto r = INPUT_VARIABLE(2); auto s = INPUT_VARIABLE(3); @@ -109,7 +107,7 @@ namespace ops { rates = r->template asVectorT(); } else { if (block.numI() < 9) { - newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType()); + auto newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType()); return SHAPELIST(newShape); } @@ -129,7 +127,7 @@ namespace ops { helpers::dilation_hw(block.launchContext(), input, weights, strides, rates, isSameShape, &sH, &sW, &pH, &pW, &dH, &dW, &oH, &oW); std::array shape = {{bS, oH, oW, iC}}; - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(weights), 'c', 4, shape.data()); + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(weights), 'c', 4, shape.data()); return SHAPELIST(newShape); } } diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/pointwiseConv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/pointwiseConv2d.cpp index 52960c3fc..0f7bdde10 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/pointwiseConv2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/pointwiseConv2d.cpp @@ -72,10 +72,9 @@ CUSTOM_OP_IMPL(pointwise_conv2d, 2, 1, false, 0, 0) { DECLARE_SHAPE_FN(pointwise_conv2d) { - - Nd4jLong* inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) - Nd4jLong* weightsShapeInfo = inputShape->at(1); // [1, 1, iC, oC], [oC, iC, 1, 1], [oC, 1, 1, iC] - Nd4jLong* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC] + auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) + auto weightsShapeInfo = inputShape->at(1); // [1, 1, iC, oC], [oC, iC, 1, 1], [oC, 1, 1, iC] + auto biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC] const int rank = 4; REQUIRE_TRUE(inputShapeInfo[0] == rank, 0, "CUSTOM POINTWISECONV2D OP: rank of input array must be equal to %i, but got %i instead !", rank, inputShapeInfo[0]); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/sconv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/sconv2d.cpp index a804abafa..d887d7c2a 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/sconv2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/sconv2d.cpp @@ -106,8 +106,8 @@ DECLARE_SHAPE_FN(sconv2d) { auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) auto weightsDShapeInfo = inputShape->at(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC] - Nd4jLong* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC] - Nd4jLong* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr + Nd4jLong const* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC] + Nd4jLong const* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr if(block.width() == 3) if(inputShape->at(2)[0] == 4) @@ -306,8 +306,8 @@ DECLARE_SHAPE_FN(sconv2d_bp) { auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) auto gradOShapeInfo = inputShape->at(1); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next auto weightsDShapeInfo = inputShape->at(2); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC] - Nd4jLong* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC] - Nd4jLong* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr + Nd4jLong const* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC] + Nd4jLong const* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr if(block.width() == 4) { if(inputShape->at(3)[0] == 4) diff --git a/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp b/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp index bd0cf329a..c80608e03 100644 --- a/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp @@ -40,7 +40,7 @@ namespace ops { if(outputWeights){ weights = OUTPUT_VARIABLE(1); }else{ - auto weightShape = ShapeUtils::evalShapeForMatmul(keys->getShapeInfo(), queries->getShapeInfo(), true, false); + auto weightShape = ShapeUtils::evalShapeForMatmul(keys->shapeInfo(), queries->shapeInfo(), true, false); weights = new NDArray('c', weightShape, values->dataType(), block.launchContext()); } @@ -164,7 +164,7 @@ namespace ops { if(normalization) factor = sqrt((double)keys->sizeAt(-2)); - auto weightShape = ShapeUtils::evalShapeForMatmul(keys->getShapeInfo(), queries->getShapeInfo(), true, false); + auto weightShape = ShapeUtils::evalShapeForMatmul(keys->shapeInfo(), queries->shapeInfo(), true, false); sd::ops::matmul mmul; NDArray preSoftmax('c', weightShape, values->dataType(), block.launchContext()); @@ -188,7 +188,7 @@ namespace ops { softmax.execute({&preSoftmax}, {&weights},{}, {-2}, {}); sd::ops::matmul_bp mmul_bp; - NDArray dLdw(weights.getShapeInfo(), block.workspace()); + NDArray dLdw(weights.shapeInfo(), block.workspace()); mmul_bp.execute({values, &weights, eps}, std::vector{dLdv, &dLdw}, {}, {}, {}); NDArray dLds(preSoftmax.shapeInfo(), block.workspace()); diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp index d92c27442..31dd72fc3 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp @@ -95,8 +95,8 @@ DECLARE_SYN(maxpool, maxpool2d); DECLARE_SHAPE_FN(maxpool2d) { //NDArray *x = block.getVariables().at(0)->getNDArray(); - Nd4jLong* inShape = inputShape->at(0); - Nd4jLong* shapeOf = shape::shapeOf(inShape); + auto inShape = inputShape->at(0); + auto shapeOf = shape::shapeOf(inShape); // 0 - number of dimensions; 1,2 - kernel Height/Width; 3,4 - stride Height/Width; 5,6 - pad Height/Width; 7,8 - dilation Height/Width; 9,10 - input Height/Width; 11 - batch size; 12 - input depth; 13 - same mode; int kH = INT_ARG(0); int kW = INT_ARG(1); diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp index 3fd5f9c51..d1b5928b6 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp @@ -106,7 +106,7 @@ DECLARE_SHAPE_FN(maxpool3dnew) { REQUIRE_TRUE(dD != 0 && dH != 0 && dW != 0, 0, "MAXPOOL3DNEW op: dilation must not be zero, but got instead {%i, %i, %i}", dD, dH, dW); - Nd4jLong* inputShapeInfo = inputShape->at(0); + auto inputShapeInfo = inputShape->at(0); int idxID, idxIC; if(isNCDHW) { idxID = 2; idxIC = 1;} diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp index 4c9319ca1..adcd40daa 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp @@ -187,7 +187,7 @@ CUSTOM_OP_IMPL(pnormpool2d_bp, 2, 1, false, 1, 10) { // NDArray* columns = columnsWrongShape.permute({0, 1, 4, 5, 2, 3}); // [bS, iC, oH, oW, kH, kW] -> [bS, iC, kH, kW, oH, oW] // NDArray* gradOVector = gradO->reshape('c', {(int) gradO->lengthOf(), 1}); // NDArray* columns2d = columnsWrongShape.reshape('c', {bS*iC*oH*oW, kH*kW}); - // NDArray pNorm(columns2d->getShapeInfo(), block.getWorkspace()); + // NDArray pNorm(columns2d->shapeInfo(), block.getWorkspace()); // input->template applyTransform>(columns, std::vector({(T)kH, (T)kW, (T)sH, (T)sW, (T)pH, (T)pW, (T)dH, (T)dW, (T)0.f, (T)0.f}).data()); diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicBidirectionalRNN.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicBidirectionalRNN.cpp index 33fd5e8ea..d03f568b5 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicBidirectionalRNN.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicBidirectionalRNN.cpp @@ -214,10 +214,10 @@ DECLARE_SHAPE_FN(dynamic_bidirectional_rnn) { hFWFinalPrevShapeInfo[2] = numUnitsFW; hBWFinalPrevShapeInfo[2] = numUnitsBW; - ShapeUtils::updateStridesAndType(hFWShapeInfo, x->getShapeInfo(), x->ordering()); - ShapeUtils::updateStridesAndType(hBWShapeInfo, x->getShapeInfo(), x->ordering()); - ShapeUtils::updateStridesAndType(hFWFinalPrevShapeInfo, x->getShapeInfo(), x->ordering()); - ShapeUtils::updateStridesAndType(hBWFinalPrevShapeInfo, x->getShapeInfo(), x->ordering()); + ShapeUtils::updateStridesAndType(hFWShapeInfo, x->shapeInfo(), x->ordering()); + ShapeUtils::updateStridesAndType(hBWShapeInfo, x->shapeInfo(), x->ordering()); + ShapeUtils::updateStridesAndType(hFWFinalPrevShapeInfo, x->shapeInfo(), x->ordering()); + ShapeUtils::updateStridesAndType(hBWFinalPrevShapeInfo, x->shapeInfo(), x->ordering()); return SHAPELIST(CONSTANT(hFWShapeInfo), CONSTANT(hBWShapeInfo), CONSTANT(hFWFinalPrevShapeInfo), CONSTANT(hBWFinalPrevShapeInfo)); } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicRNN.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicRNN.cpp index 41696638d..9836d65ce 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicRNN.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicRNN.cpp @@ -109,8 +109,8 @@ DECLARE_SHAPE_FN(dynamic_rnn) { auto WhShapeInfo = inputShape->at(2); // hidden-to-hidden weights, [numUnits x numUnits] auto bShapeInfo = inputShape->at(3); // biases for, [2*numUnits] - Nd4jLong* h0ShapeInfo = nullptr; // initial cell output (at time step = 0) [bS x numUnits] - Nd4jLong* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep + Nd4jLong const* h0ShapeInfo = nullptr; // initial cell output (at time step = 0) [bS x numUnits] + Nd4jLong const* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep const int timeMajor = block.getIArguments()->size() > 0 ? INT_ARG(0) : 0; // if true then [time, bS, ...], else [bS, time, ...] diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp index dee9a7c88..a0b1e707b 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp @@ -91,7 +91,7 @@ DECLARE_SHAPE_FN(gru) { REQUIRE_TRUE(Wh->isSameShape(whCorrectShape), 0, "GRU operation: wrong shape of hidden-to-hidden weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(whCorrectShape).c_str(), ShapeUtils::shapeAsString(Wh).c_str()); REQUIRE_TRUE(b->isSameShape(bCorrectShape), 0, "GRU operation: wrong shape of biases array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(bCorrectShape).c_str(), ShapeUtils::shapeAsString(b).c_str()); - auto* hShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(hI->dataType(), hI->ordering(), {time, bS, nOut}); + auto hShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(hI->dataType(), hI->ordering(), {time, bS, nOut}); return SHAPELIST(hShapeInfo); } @@ -173,11 +173,11 @@ DECLARE_SHAPE_FN(gru_bp) { REQUIRE_TRUE(b->isSameShape(bCorrectShape), 0, "GRU_BP operation: wrong shape of biases array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(bCorrectShape).c_str(), ShapeUtils::shapeAsString(b).c_str()); REQUIRE_TRUE(dLdh->isSameShape(hCorrectShape),0, "GRU_BP operation: wrong shape of gradient vs. ff output, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(hCorrectShape).c_str(), ShapeUtils::shapeAsString(dLdh).c_str()); - Nd4jLong* dLdxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), x->getShapeInfo()); - Nd4jLong* dLdhIShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), hI->getShapeInfo()); - Nd4jLong* dLdWxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wx->getShapeInfo()); - Nd4jLong* dLdWhShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wh->getShapeInfo()); - Nd4jLong* dLdbShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), b->getShapeInfo()); + auto dLdxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), x->shapeInfo()); + auto dLdhIShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), hI->shapeInfo()); + auto dLdWxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wx->shapeInfo()); + auto dLdWhShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wh->shapeInfo()); + auto dLdbShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), b->shapeInfo()); return SHAPELIST(dLdxShapeInfo, dLdhIShapeInfo, dLdWxShapeInfo, dLdWhShapeInfo, dLdbShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlock.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlock.cpp index 3225f3f74..1fd7ec8cc 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlock.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlock.cpp @@ -113,7 +113,7 @@ DECLARE_SHAPE_FN(lstmBlock) { } ShapeUtils::updateStridesAndType(s, x, 'c'); - Nd4jLong *s1 = CONSTANT(s); + auto s1 = CONSTANT(s); //7 outputs, all same shape/type return SHAPELIST(s1, s1, s1, s1, s1, s1, s1); diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlockCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlockCell.cpp index 333854ba3..55d3a6b7a 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlockCell.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlockCell.cpp @@ -115,7 +115,7 @@ DECLARE_SHAPE_FN(lstmBlockCell) { ShapeUtils::updateStridesAndType(s, xt, 'c'); - Nd4jLong *s1 = CONSTANT(s); + auto s1 = CONSTANT(s); //7 outputs, all same shape: z, i, f, o, h, c, y return SHAPELIST(s1, s1, s1, s1, s1, s1, s1); diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp index 871291165..a5c8b8d28 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp @@ -334,7 +334,7 @@ DECLARE_SHAPE_FN(lstmLayer) { else type = sd::DataType::FLOAT32; - std::vector shapes; + auto shapes = SHAPELIST(); // evaluate h shape (output) if(retFullSeq) { @@ -362,7 +362,7 @@ DECLARE_SHAPE_FN(lstmLayer) { hShape = {sL, 2, bS, nOut}; } - shapes.push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hShape)); + shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hShape)); } // evaluate hL shape (output at last step) @@ -375,10 +375,10 @@ DECLARE_SHAPE_FN(lstmLayer) { else hLShape = {2, bS, nOut}; - shapes.push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hLShape)); + shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hLShape)); if(retLastC) // cL and hL have same shapes - shapes.push_back(shapes.back()); + shapes->push_back(shapes->at(shapes->size() - 1)); } // evaluate cL shape (cell state at last step) @@ -391,10 +391,10 @@ DECLARE_SHAPE_FN(lstmLayer) { else cLShape = {2, bS, nOut}; - shapes.push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), cLShape)); + shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), cLShape)); } - return new ShapeList(shapes); + return shapes; } @@ -785,20 +785,20 @@ DECLARE_SHAPE_FN(lstmLayer_bp) { const auto cI = hasInitC ? INPUT_VARIABLE(count++) : nullptr; // initial cell state const auto Wp = hasPH ? INPUT_VARIABLE(count++) : nullptr; // peephole weights - std::vector outShapes = {x->getShapeInfo(), Wx->getShapeInfo(), Wr->getShapeInfo()}; + auto outShapes = SHAPELIST(x->shapeInfo(), Wx->shapeInfo(), Wr->shapeInfo()); if(b != nullptr) - outShapes.push_back(b->getShapeInfo()); + outShapes->push_back(b->shapeInfo()); if(seqLen != nullptr) - outShapes.push_back(seqLen->getShapeInfo()); + outShapes->push_back(seqLen->shapeInfo()); if(hI != nullptr) - outShapes.push_back(hI->getShapeInfo()); + outShapes->push_back(hI->shapeInfo()); if(cI != nullptr) - outShapes.push_back(cI->getShapeInfo()); + outShapes->push_back(cI->shapeInfo()); if(Wp != nullptr) - outShapes.push_back(Wp->getShapeInfo()); + outShapes->push_back(Wp->shapeInfo()); - return new ShapeList(outShapes); + return outShapes; } } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayerCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayerCell.cpp index 4f24219bd..645541d6b 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayerCell.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayerCell.cpp @@ -154,7 +154,7 @@ DECLARE_SHAPE_FN(lstmLayerCell) { const auto hI = INPUT_VARIABLE(count++); // initial output const auto cI = INPUT_VARIABLE(count); // initial cell state - return new ShapeList({hI->getShapeInfo(), cI->getShapeInfo()}); + return new ShapeList({hI->shapeInfo(), cI->shapeInfo()}); } ////////////////////////////////////////////////////////////////////////// @@ -319,18 +319,18 @@ DECLARE_SHAPE_FN(lstmLayerCellBp) { const auto cI = INPUT_VARIABLE(count++); // initial cell state const auto Wp = hasPH ? INPUT_VARIABLE(count) : nullptr; // peephole weights - std::vector shapes = {x->getShapeInfo(), Wx->getShapeInfo(), Wr->getShapeInfo()}; + auto shapes = SHAPELIST(x->shapeInfo(), Wx->shapeInfo(), Wr->shapeInfo()); if(b != nullptr) - shapes.push_back(b->getShapeInfo()); + shapes->push_back(b->shapeInfo()); - shapes.push_back(hI->getShapeInfo()); - shapes.push_back(cI->getShapeInfo()); + shapes->push_back(hI->shapeInfo()); + shapes->push_back(cI->shapeInfo()); if(Wp != nullptr) - shapes.push_back(Wp->getShapeInfo()); + shapes->push_back(Wp->shapeInfo()); - return new ShapeList(shapes); + return shapes; } } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp index 9b78a5c56..84dd6356a 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp @@ -67,7 +67,7 @@ CUSTOM_OP_IMPL(sru, 5, 2, false, 0, 0) { // xm = x * mask auto xm = x; if(mask) { - xm = new NDArray(x->getShapeInfo(), true, block.launchContext()); + xm = new NDArray(x->shapeInfo(), true, block.launchContext()); x->applyBroadcast(broadcast::Multiply, {0, 1}, *mask, *xm); } @@ -92,7 +92,7 @@ DECLARE_SHAPE_FN(sru) { auto wShapeInfo = inputShape->at(1); // W, 2d tensor of weights [3*inSize x inSize] auto bShapeInfo = inputShape->at(2); // B, row of biases with twice length [2*inSize] auto c0ShapeInfo = inputShape->at(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0 - Nd4jLong* maskShapeInfo = block.width() > 4 ? inputShape->at(4) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] + auto maskShapeInfo = block.width() > 4 ? inputShape->at(4) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] const int rank = xShapeInfo[0]; // = 3 const int bS = xShapeInfo[1]; @@ -367,7 +367,7 @@ DECLARE_SHAPE_FN(sru_bi) { auto wShapeInfo = inputShape->at(1); auto bShapeInfo = inputShape->at(2); auto c0ShapeInfo = inputShape->at(3); - Nd4jLong* maskShapeInfo = block.width() > 4 ? inputShape->at(4) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] + auto maskShapeInfo = block.width() > 4 ? inputShape->at(4) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] const int rank = xShapeInfo[0]; // = 3 const Nd4jLong time = xShapeInfo[1]; @@ -465,7 +465,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) { auto ctShapeInfo = inputShape->at(4); auto inGradC0ShapeInfo = inputShape->at(5); auto inGradHtShapeInfo = inputShape->at(6); - Nd4jLong* maskShapeInfo = block.width() > 7 ? inputShape->at(7) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] + auto maskShapeInfo = block.width() > 7 ? inputShape->at(7) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] // input shapes validation const int rank = xShapeInfo[0]; @@ -777,7 +777,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) { // } // static NDArray sigmoid_(const NDArray& arr) { -// NDArray result(arr.getShapeInfo(), false, arr.getContext()); +// NDArray result(arr.shapeInfo(), false, arr.getContext()); // (const_cast(arr)).applyTransform(transform::Sigmoid, &result); // return result; // } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/staticBidirectionalRNN.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/staticBidirectionalRNN.cpp index bc27c08f6..fbe604a31 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/staticBidirectionalRNN.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/staticBidirectionalRNN.cpp @@ -147,9 +147,9 @@ DECLARE_SHAPE_FN(static_bidirectional_rnn) { auto WhBWShapeInfo = inputShape->at(5); // hidden-to-hidden weights for backward RNN, [numUnitsBW x numUnitsBW] auto bBWShapeInfo = inputShape->at(6); // biases for backward RNN, [2*numUnitsBW] - Nd4jLong* h0FWShapeInfo = nullptr; // initial cell output for forward RNN (at time step = 0) [bS x numUnitsFW] - Nd4jLong* h0BWShapeInfo = nullptr; // initial cell output for backward RNN (at time step = 0) [bS x numUnitsBW] - Nd4jLong* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep + Nd4jLong const* h0FWShapeInfo = nullptr; // initial cell output for forward RNN (at time step = 0) [bS x numUnitsFW] + Nd4jLong const* h0BWShapeInfo = nullptr; // initial cell output for backward RNN (at time step = 0) [bS x numUnitsBW] + Nd4jLong const* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep switch(block.width()) { case 8: diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/staticRNN.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/staticRNN.cpp index 4100f6745..26d2e0818 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/staticRNN.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/staticRNN.cpp @@ -90,8 +90,8 @@ DECLARE_SHAPE_FN(static_rnn) { auto WhShapeInfo = inputShape->at(2); // hidden-to-hidden weights, [numUnits x numUnits] auto bShapeInfo = inputShape->at(3); // biases for, [2*numUnits] - Nd4jLong* h0ShapeInfo = nullptr; // initial cell output (at time step = 0) [bS x numUnits] - Nd4jLong* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep + const Nd4jLong* h0ShapeInfo = nullptr; // initial cell output (at time step = 0) [bS x numUnits] + const Nd4jLong* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep if(block.width() == 5) { if (inputShape->at(4)[0] == 2) diff --git a/libnd4j/include/ops/declarable/generic/nn/relu_layer.cpp b/libnd4j/include/ops/declarable/generic/nn/relu_layer.cpp index 94a4a0ca4..c76b79b7b 100644 --- a/libnd4j/include/ops/declarable/generic/nn/relu_layer.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/relu_layer.cpp @@ -51,7 +51,7 @@ namespace sd { auto weightsShape = inputShape->at(1); auto outputShape = ShapeUtils::matrixProductShape(inShape, weightsShape, false, false, ArrayOptions::dataType(inShape), block.getWorkspace()); - return SHAPELIST(CONSTANT(outputShape)); + return SHAPELIST(outputShape); } DECLARE_TYPES(relu_layer) { diff --git a/libnd4j/include/ops/declarable/generic/nn/xw_plus_b.cpp b/libnd4j/include/ops/declarable/generic/nn/xw_plus_b.cpp index dbabad395..5b36ee0e5 100644 --- a/libnd4j/include/ops/declarable/generic/nn/xw_plus_b.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/xw_plus_b.cpp @@ -73,7 +73,7 @@ namespace sd { auto outputShape = ShapeUtils::matrixProductShape(inputShape->at(0), weightsShape, false, false, ArrayOptions::dataType(inputShape->at(0)), block.getWorkspace()); - return SHAPELIST(CONSTANT(outputShape)); + return SHAPELIST(outputShape); } DECLARE_TYPES(xw_plus_b) { @@ -121,7 +121,6 @@ namespace sd { } DECLARE_SHAPE_FN(xw_plus_b_bp) { - Nd4jLong* xShapeInfo; Nd4jLong* wShapeInfo; Nd4jLong* bShapeInfo; @@ -129,7 +128,6 @@ namespace sd { COPY_SHAPE(inputShape->at(0), xShapeInfo); COPY_SHAPE(inputShape->at(1), wShapeInfo); COPY_SHAPE(inputShape->at(2), bShapeInfo); - return SHAPELIST(CONSTANT(xShapeInfo), CONSTANT(wShapeInfo), CONSTANT(bShapeInfo)); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp index 2f90adb78..4fc31dd51 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp @@ -55,7 +55,7 @@ CUSTOM_OP_IMPL(broadcast_dynamic_shape, 2, 1, false, 0, 0) { for (Nd4jLong i = 0; i < y->lengthOf(); ++i) yShapeInfo[i + 1] = y->e(i); - Nd4jLong* poinerOnOutShapeInfo = nullptr; + const Nd4jLong* poinerOnOutShapeInfo = nullptr; const bool isBroadcastPossible = ShapeUtils::evalBroadcastShapeInfo(xShapeInfo.data(), yShapeInfo.data(), true, poinerOnOutShapeInfo, block.launchContext()->getWorkspace()); diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp index c32ee1ba9..ecddab3bc 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp @@ -80,7 +80,7 @@ namespace sd { DECLARE_SHAPE_FN(non_max_suppression) { auto in = inputShape->at(0); int outRank = shape::rank(in); - Nd4jLong *outputShape = nullptr; + const Nd4jLong *outputShape = nullptr; int maxOutputSize; if (block.width() > 2) @@ -178,7 +178,7 @@ namespace sd { DECLARE_SHAPE_FN(non_max_suppression_v3) { auto in = inputShape->at(0); int outRank = shape::rank(in); - Nd4jLong *outputShape = nullptr; + int maxOutputSize; if (block.width() > 2) @@ -211,7 +211,7 @@ namespace sd { if (len > 0) len = helpers::nonMaxSuppressionV3(block.launchContext(), boxes, scales, maxOutputSize, overlayThreshold, scoreThreshold, nullptr); - outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(len, DataType::INT32); + auto outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(len, DataType::INT32); return SHAPELIST(outputShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp index a8477c63a..30f59ff35 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp @@ -58,7 +58,6 @@ namespace sd { DECLARE_SHAPE_FN(non_max_suppression_overlaps) { auto in = inputShape->at(0); int outRank = shape::rank(in); - Nd4jLong *outputShape = nullptr; int maxOutputSize; if (block.width() > 2) @@ -76,7 +75,7 @@ namespace sd { if (boxSize < maxOutputSize) maxOutputSize = boxSize; - outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxOutputSize, DataType::INT32); + auto outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxOutputSize, DataType::INT32); return SHAPELIST(outputShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp index b1b68c23d..b0a549c43 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp @@ -49,24 +49,25 @@ namespace sd { auto in = inputShape->at(0); int outRank = shape::rank(in) - 1; - Nd4jLong *outputShape = nullptr; + Nd4jLong const* outShape = nullptr; if (outRank > 1) { + Nd4jLong *outputShape = nullptr; ALLOCATE(outputShape, block.getWorkspace(), shape::shapeInfoLength(outRank), Nd4jLong); outputShape[0] = outRank; for (Nd4jLong e = 0; e < outRank; e++) outputShape[e + 1] = in[e + 1]; ShapeUtils::updateStridesAndType(outputShape, in, shape::order(in)); - outputShape = CONSTANT(outputShape); + outShape = CONSTANT(outputShape); } else if (outRank == 1) { - outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(in, 0), ArrayOptions::dataType(in)); + outShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(in, 0), ArrayOptions::dataType(in)); } else { //outputShape = shape::createScalarShapeInfo(); - outputShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in)); + outShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in)); } - return SHAPELIST(outputShape); + return SHAPELIST(outShape); } DECLARE_TYPES(nth_element) { getOpDescriptor() diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp index d64499ecf..6349b84fe 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp @@ -90,7 +90,6 @@ namespace sd { REQUIRE_TRUE(depth > 0, 0, "OneHot: depth must be positive value"); - Nd4jLong *newShape; int rank = shape::rank(inShape); if (axis < 0) @@ -101,7 +100,7 @@ namespace sd { shape.push_back(shape::shapeOf(inShape)[e]); shape.insert(shape.begin() + axis, depth); - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', rank + 1, shape.data()); + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', rank + 1, shape.data()); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_max.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_max.cpp index 7ab19668a..b348c4549 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_max.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_max.cpp @@ -82,8 +82,8 @@ namespace sd { return helpers::segmentMaxFunctorBP(block.launchContext(), input, indices, gradOut, output); } DECLARE_SHAPE_FN(segment_max_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_mean.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_mean.cpp index abb865d8e..1d8a5bb7f 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_mean.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_mean.cpp @@ -82,14 +82,15 @@ namespace sd { return helpers::segmentMeanFunctorBP(block.launchContext(), input, indices, gradOut, output); } DECLARE_SHAPE_FN(segment_mean_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); +// return SHAPELIST(in, inIdx); } DECLARE_TYPES(segment_mean_bp) { getOpDescriptor() diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_min.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_min.cpp index a245b000b..10bc1dd26 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_min.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_min.cpp @@ -44,7 +44,7 @@ namespace sd { DECLARE_SHAPE_FN(segment_min) { auto idxVector = INPUT_VARIABLE(1); - Nd4jLong* in = inputShape->at(0); + auto in = inputShape->at(0); int outRank = shape::rank(in); Nd4jLong* outputShape = nullptr; int val = (*idxVector).e(idxVector->lengthOf() - 1); @@ -72,14 +72,15 @@ namespace sd { return helpers::segmentMinFunctorBP(block.launchContext(), input, indices, gradOut, output); } DECLARE_SHAPE_FN(segment_min_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); +// return SHAPELIST(in, inIdx); } DECLARE_TYPES(segment_min) { diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_prod.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_prod.cpp index 478eb9e23..4f83ac9b0 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_prod.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_prod.cpp @@ -85,8 +85,8 @@ namespace sd { DECLARE_SHAPE_FN(segment_prod_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_sum.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_sum.cpp index bb959fd3d..cb4734c5f 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_sum.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_sum.cpp @@ -68,15 +68,15 @@ namespace sd { return helpers::segmentSumFunctorBP(block.launchContext(), INPUT_VARIABLE(0), INPUT_VARIABLE(1), INPUT_VARIABLE(2), OUTPUT_NULLIFIED(0)); } DECLARE_SHAPE_FN(segment_sum_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); - +// return SHAPELIST(in, inIdx); } DECLARE_TYPES(segment_sum) { diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp index 64b915c53..9005348a1 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp @@ -40,8 +40,8 @@ namespace sd { auto in = inputShape->at(0); auto source = INPUT_VARIABLE(0); // auto shapeList = SHAPELIST(); - Nd4jLong* valuesShape; - Nd4jLong* indicesShape; + const Nd4jLong* valuesShape; + const Nd4jLong* indicesShape; int uniqueCount = helpers::uniqueCount(block.launchContext(), source); diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_max.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_max.cpp index 8ca01540c..1909005a7 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_max.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_max.cpp @@ -81,8 +81,8 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_max_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_mean.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_mean.cpp index 7aa46295c..def3adb6a 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_mean.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_mean.cpp @@ -83,15 +83,15 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_mean_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); - +// return SHAPELIST(in, inIdx); } } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_min.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_min.cpp index 76dd982f7..da31477eb 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_min.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_min.cpp @@ -43,7 +43,7 @@ namespace sd { DECLARE_SHAPE_FN(unsorted_segment_min) { - Nd4jLong* in = inputShape->at(0); + auto in = inputShape->at(0); int outRank = shape::rank(in); Nd4jLong* outputShape = nullptr; Nd4jLong numOfClasses = block.width() == 3 ? INPUT_VARIABLE(2)->e(0) : INT_ARG(0); @@ -83,8 +83,8 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_min_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_prod.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_prod.cpp index d2f491c55..905a04b36 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_prod.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_prod.cpp @@ -96,15 +96,15 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_prod_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); - +// return SHAPELIST(in, inIdx); } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sqrt_n.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sqrt_n.cpp index a8dbf8eaf..e208f4489 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sqrt_n.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sqrt_n.cpp @@ -81,15 +81,15 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_sqrt_n_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); - +// return SHAPELIST(in, inIdx); } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sum.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sum.cpp index 1afcab34f..325385a86 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sum.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sum.cpp @@ -71,8 +71,8 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_sum_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp b/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp index 1441448c9..f0b2b587b 100644 --- a/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp +++ b/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp @@ -38,7 +38,7 @@ namespace sd { T f = T_ARG(0); - functions::random::RandomFunction::template execTransform>(block.getRNG(), z->getBuffer(), z->getShapeInfo(), &f); + functions::random::RandomFunction::template execTransform>(block.getRNG(), z->buffer(), z->shapeInfo(), &f); */ auto z = OUTPUT_VARIABLE(0); diff --git a/libnd4j/include/ops/declarable/generic/random/gamma.cpp b/libnd4j/include/ops/declarable/generic/random/gamma.cpp index d508e1929..e21458530 100644 --- a/libnd4j/include/ops/declarable/generic/random/gamma.cpp +++ b/libnd4j/include/ops/declarable/generic/random/gamma.cpp @@ -60,7 +60,9 @@ namespace sd { if (inputShape->size() > 2) { auto rest = inputShape->at(2); additionalShape = nullptr; REQUIRE_TRUE(ShapeUtils::areShapesBroadcastable(alphaShape, rest), 0, "random_gamma: alpha and beta shapes should be broadcastable."); - ShapeUtils::evalBroadcastShapeInfo(alphaShape, rest, true, additionalShape, block.workspace()); + const Nd4jLong* additionalShapeBroadcasted = nullptr; + ShapeUtils::evalBroadcastShapeInfo(alphaShape, rest, true, additionalShapeBroadcasted, block.workspace()); + additionalShape = additionalShapeBroadcasted; } auto lastDim = shape::sizeAt(alphaShape, 0); auto dtype = ArrayOptions::dataType(alphaShape); @@ -80,4 +82,4 @@ namespace sd { } } -#endif \ No newline at end of file +#endif diff --git a/libnd4j/include/ops/declarable/generic/random/normal.cpp b/libnd4j/include/ops/declarable/generic/random/normal.cpp index 8bfbd8db6..f81a06786 100644 --- a/libnd4j/include/ops/declarable/generic/random/normal.cpp +++ b/libnd4j/include/ops/declarable/generic/random/normal.cpp @@ -36,7 +36,7 @@ namespace sd { auto x = INPUT_VARIABLE(0); auto z = OUTPUT_VARIABLE(0); - functions::random::RandomFunction::template execTransform>(block.getRNG(), z->getBuffer(), z->getShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->getBuffer(), z->getShapeInfo(), block.getTArguments()->data()); + functions::random::RandomFunction::template execTransform>(block.getRNG(), z->buffer(), z->shapeInfo(), z->buffer(), z->shapeInfo(), z->buffer(), z->shapeInfo(), block.getTArguments()->data()); */ RandomLauncher::fillGaussian(block.launchContext(), rng, OUTPUT_VARIABLE(0), T_ARG(0), T_ARG(1)); diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduceStDev.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduceStDev.cpp index 1682b9d72..d101a6a79 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduceStDev.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduceStDev.cpp @@ -83,7 +83,7 @@ DECLARE_SHAPE_FN(reduce_stdev) { for(const auto& item : dimensions) REQUIRE_TRUE(item >= -inputShape->at(0)[0] && item < inputShape->at(0)[0], 0, "REDUCE_STDEV OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , inputShape->at(0)[0], inputShape->at(0)[0], item); - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(in), dimensions, in, keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(in), dimensions, in, keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } @@ -132,7 +132,7 @@ CUSTOM_OP_IMPL(reduce_stdev_bp, 2, 1, false, 0, 0) { auto mean = input->reduceAlongDimension(reduce::Mean, dimensions, true); - NDArray variance(mean.getShapeInfo(), true, block.launchContext()); // create empty array with shape matching shape of mean array + NDArray variance(mean.shapeInfo(), true, block.launchContext()); // create empty array with shape matching shape of mean array input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, variance, biasCorrected, dimensions); gradI->assign( (*input - mean) / (variance * NminusOne)); // automatic broadcasting happens here @@ -165,6 +165,7 @@ DECLARE_SHAPE_FN(reduce_stdev_bp) { COPY_SHAPE(in, gradIshapeInfo); return SHAPELIST(CONSTANT(gradIshapeInfo)); +// return SHAPELIST(in); } DECLARE_TYPES(reduce_stdev_bp) { diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduce_logsumexp.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduce_logsumexp.cpp index 805db1883..556ad2a7c 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduce_logsumexp.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduce_logsumexp.cpp @@ -70,7 +70,7 @@ namespace ops { axes = *block.getIArguments(); } - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), axes, inputShape->at(0), keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), axes, inputShape->at(0), keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduce_max.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduce_max.cpp index 3d2dbe57e..bea1e7ecc 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduce_max.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduce_max.cpp @@ -77,7 +77,7 @@ DECLARE_SHAPE_FN(reduce_max) { for(const auto& item : dimensions) REQUIRE_TRUE(item >= -inputShape->at(0)[0] && item < inputShape->at(0)[0], 0, "REDUCE_MAX OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , inputShape->at(0)[0], inputShape->at(0)[0], item); - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduce_min.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduce_min.cpp index 254cfe021..d4b470b8e 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduce_min.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduce_min.cpp @@ -77,7 +77,7 @@ DECLARE_SHAPE_FN(reduce_min) { for(const auto& item : dimensions) REQUIRE_TRUE(item >= -inputShape->at(0)[0] && item < inputShape->at(0)[0], 0, "REDUCE_MIN OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , inputShape->at(0)[0], inputShape->at(0)[0], item); - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduce_sqnorm.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduce_sqnorm.cpp index 0c53a261b..22d2c6e1b 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduce_sqnorm.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduce_sqnorm.cpp @@ -75,7 +75,7 @@ DECLARE_SHAPE_FN(reduce_sqnorm) { for(const auto& item : dimensions) REQUIRE_TRUE(item >= -inputShape->at(0)[0] && item < inputShape->at(0)[0], 0, "REDUCE_SQNORM OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , inputShape->at(0)[0], inputShape->at(0)[0], item); - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/shape/reshape_as.cpp b/libnd4j/include/ops/declarable/generic/shape/reshape_as.cpp index 90e2ff398..9a8dc00c2 100644 --- a/libnd4j/include/ops/declarable/generic/shape/reshape_as.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/reshape_as.cpp @@ -47,7 +47,7 @@ namespace sd { DECLARE_SHAPE_FN(reshapeas) { - return SHAPELIST(ShapeBuilders::copyShapeInfo(INPUT_VARIABLE(1)->getShapeInfo(), false, block.workspace())); + return SHAPELIST(ShapeBuilders::copyShapeInfo(INPUT_VARIABLE(1)->shapeInfo(), false, block.workspace())); } DECLARE_TYPES(reshapeas) { diff --git a/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp b/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp index 812947422..0b71dae52 100644 --- a/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp @@ -93,7 +93,7 @@ namespace sd { DECLARE_SHAPE_FN(squeeze) { auto shapeList = SHAPELIST(); - Nd4jLong* newShape; +// Nd4jLong* newShape; auto in = inputShape->at(0); auto rank = shape::rank(in); auto length = shape::length(in); @@ -148,7 +148,7 @@ namespace sd { return shapeList; } - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), order, shape); + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), order, shape); shapeList->push_back(newShape); return shapeList; } diff --git a/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp b/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp index 747331ef0..88b06a631 100644 --- a/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp +++ b/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp @@ -415,7 +415,7 @@ namespace sd { ALLOCATE(subArrShapeInfo, block.getWorkspace(), shape::shapeInfoLength(x->rankOf()), Nd4jLong); Nd4jLong offset; - shape::calcSubArrShapeInfoAndOffset(indices.data(), x->getShapeInfo(), subArrShapeInfo, offset, true, true); + shape::calcSubArrShapeInfoAndOffset(indices.data(), x->shapeInfo(), subArrShapeInfo, offset, true, true); auto subArrShapeInfoPack = ConstantShapeHelper::getInstance()->bufferForShapeInfo(subArrShapeInfo); NDArray::prepareSpecialUse({z}, {x}); @@ -502,7 +502,6 @@ namespace sd { ++e; } - Nd4jLong *newShape; std::vector input_shape; //(shape::rank(inShape)); auto inputLen = shape::length(inShape); std::vector shape; @@ -519,7 +518,7 @@ namespace sd { std::vector indices; bool result = _preprocess_strided_slice(&indices, &shape, input_shape, begin, end, strides, begin_mask, ellipsis_mask, end_mask, new_axis_mask, shrink_axis_mask, &is_identity, &is_simple_slice, &is_dim0); if (indices.size()) { - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', shape); // if (inputLen > 1) { // newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', @@ -527,10 +526,10 @@ namespace sd { // } else { // newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape)); // } - } else - newShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(inShape)); + return SHAPELIST(newShape); + } - return SHAPELIST(newShape); + return SHAPELIST(ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(inShape))); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp index 0b171b36f..fb1fd2e87 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp @@ -132,7 +132,7 @@ DECLARE_SHAPE_FN(concat) { // first of all take into account possible presence of empty arrays // also if scalar is present -> use the shape of vector with length=1 instead - std::vector arrShapes; + ShapeList arrShapes; std::vector shapesToDelete; int index = 0; for(int i = 0; i < numOfInArrs; ++i) { @@ -151,7 +151,7 @@ DECLARE_SHAPE_FN(concat) { const int numOfNonEmptyArrs = arrShapes.size(); - const int rank = arrShapes[0][0]; + const int rank = shape::rank(arrShapes.at(0)); int axis = isAxisInLastArr ? INPUT_VARIABLE(block.width() - 1)->e(0) : INT_ARG(0); if(axis < 0){ @@ -162,33 +162,33 @@ DECLARE_SHAPE_FN(concat) { REQUIRE_TRUE(0 <= axis && axis < rank, 0, "CONCAT op: input axis must be in range [0, %i], but got %i instead!", rank-1, axis); for(int i = 1; i < numOfNonEmptyArrs; ++i) - REQUIRE_TRUE(arrShapes[i][0] == rank, 0, "CONCAT op: all input arrays must have the same rank !"); + REQUIRE_TRUE(shape::rank(arrShapes.at(i)) == rank, 0, "CONCAT op: all input arrays must have the same rank !"); for(int i = 1; i < numOfNonEmptyArrs; ++i) { for(int dim = 0; dim < rank; ++dim) if(dim != axis) - REQUIRE_TRUE(arrShapes[i][dim+1] == arrShapes[0][dim+1], 0, "CONCAT op: all input arrays must have the same dimensions (except those on input axis) !"); + REQUIRE_TRUE(arrShapes.at(i)[dim+1] == arrShapes.at(0)[dim+1], 0, "CONCAT op: all input arrays must have the same dimensions (except those on input axis) !"); } // ******** end of input validation ******** // Nd4jLong* outShapeInfo(nullptr); - COPY_SHAPE(arrShapes[0], outShapeInfo); + COPY_SHAPE(arrShapes.at(0), outShapeInfo); // case when we have only one input array if(numOfNonEmptyArrs == 1) { - ShapeUtils::updateStridesAndType(outShapeInfo, arrShapes[0], shape::order(arrShapes[0])); + ShapeUtils::updateStridesAndType(outShapeInfo, arrShapes.at(0), shape::order(arrShapes.at(0))); return SHAPELIST(CONSTANT(outShapeInfo)); } for(int i = 1; i < numOfNonEmptyArrs; ++i) - outShapeInfo[axis + 1] += arrShapes[i][axis + 1]; + outShapeInfo[axis + 1] += arrShapes.at(i)[axis + 1]; - ShapeUtils::updateStridesAndType(outShapeInfo, arrShapes[0], shape::order(arrShapes[0])); + ShapeUtils::updateStridesAndType(outShapeInfo, arrShapes.at(0), shape::order(arrShapes.at(0))); // delete dynamically allocated vectors shapes with length=1 - for(int index : shapesToDelete) - RELEASE(arrShapes[index], block.getWorkspace()); +// for(int index : shapesToDelete) +// RELEASE(arrShapes[index], block.getWorkspace()); auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outShapeInfo)); RELEASE(outShapeInfo, block.getWorkspace()); @@ -237,8 +237,8 @@ DECLARE_SHAPE_FN(concat) { // auto buffers = new Nd4jPointer[elements]; // auto shapes = new Nd4jPointer[elements]; - // buffers[0] = (Nd4jPointer) first->getBuffer(); - // shapes[0] = (Nd4jPointer) first->getShapeInfo(); + // buffers[0] = (Nd4jPointer) first->buffer(); + // shapes[0] = (Nd4jPointer) first->shapeInfo(); // if (_dimension < 0) // _dimension += first->rankOf(); @@ -256,8 +256,8 @@ DECLARE_SHAPE_FN(concat) { // if (array->isEmpty()) // continue; - // buffers[er] = reinterpret_cast(array->getBuffer()); - // shapes[er++] = reinterpret_cast(array->getShapeInfo()); + // buffers[er] = reinterpret_cast(array->buffer()); + // shapes[er++] = reinterpret_cast(array->shapeInfo()); // oldScalars &= array->rankOf() == 2 && array->isScalar(); @@ -274,7 +274,7 @@ DECLARE_SHAPE_FN(concat) { // _dimension = 1; // } - // sd::SpecialMethods::concatCpuGeneric(_dimension, elements, buffers, shapes, output->getBuffer(), output->getShapeInfo()); + // sd::SpecialMethods::concatCpuGeneric(_dimension, elements, buffers, shapes, output->buffer(), output->shapeInfo()); // STORE_RESULT(*output); diff --git a/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp b/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp index a4b934853..143e57a80 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp @@ -79,23 +79,20 @@ DECLARE_SHAPE_FN(mirror_pad) { REQUIRE_TRUE( (paddings->e(i,0) <= (input->sizeAt(i) - includeBorder)) && (paddings->e(i,1) <= (input->sizeAt(i) - includeBorder)), 0, "MIRROR_PAD OP: wrong content of paddings array, its elements must be no grater then corresponding dimension of input array for symmetric mode (or dimension-1 for reflect mode) !"); } - Nd4jLong* outShapeInfo(nullptr); - if(rank == 1) { Nd4jLong len = input->lengthOf() + paddings->e(0) + paddings->e(1); - outShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(len, input->dataType()); - } - else { - ALLOCATE(outShapeInfo, block.getWorkspace(), shape::shapeInfoLength(rank), Nd4jLong); - outShapeInfo[0] = rank; - for(int i = 0; i < rank; ++i) - outShapeInfo[i+1] = input->sizeAt(i) + paddings->e(i,0) + paddings->e(i,1); - ShapeUtils::updateStridesAndType(outShapeInfo, input->shapeInfo(), input->ordering()); - - outShapeInfo = CONSTANT(outShapeInfo); + return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(len, input->dataType())); } - return SHAPELIST(outShapeInfo); + Nd4jLong* outShapeInfo(nullptr); + + ALLOCATE(outShapeInfo, block.getWorkspace(), shape::shapeInfoLength(rank), Nd4jLong); + outShapeInfo[0] = rank; + for(int i = 0; i < rank; ++i) + outShapeInfo[i+1] = input->sizeAt(i) + paddings->e(i,0) + paddings->e(i,1); + ShapeUtils::updateStridesAndType(outShapeInfo, input->shapeInfo(), input->ordering()); + + return SHAPELIST(CONSTANT(outShapeInfo)); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/parallelStack.cpp b/libnd4j/include/ops/declarable/generic/transforms/parallelStack.cpp index b6a2ba1e1..46572d88e 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/parallelStack.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/parallelStack.cpp @@ -34,7 +34,7 @@ CUSTOM_OP_IMPL(parallel_stack, -1, 1, false, 0, 0) { // check whether shapes of all input array are the same for (int i = 0; i < (int) block.width() - 1; ++i) - REQUIRE_TRUE(shape::equalsSoft((INPUT_VARIABLE(i))->getShapeInfo(), (INPUT_VARIABLE(i+1))->getShapeInfo()), 0, "PARALLEL_STACK op: the shapes of all input arrays must be the same !"); + REQUIRE_TRUE(shape::equalsSoft((INPUT_VARIABLE(i))->shapeInfo(), (INPUT_VARIABLE(i+1))->shapeInfo()), 0, "PARALLEL_STACK op: the shapes of all input arrays must be the same !"); std::vector inArrs(block.width()); for(int i = 0; i < block.width(); ++i) diff --git a/libnd4j/include/ops/declarable/generic/transforms/slice.cpp b/libnd4j/include/ops/declarable/generic/transforms/slice.cpp index dc4671ef7..96e7fe6b3 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/slice.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/slice.cpp @@ -85,7 +85,7 @@ namespace sd { Nd4jLong offset; - shape::calcSubArrShapeInfoAndOffset(indices.data(), input->getShapeInfo(), subArrShapeInfo, offset, true); + shape::calcSubArrShapeInfoAndOffset(indices.data(), input->shapeInfo(), subArrShapeInfo, offset, true); auto subArrShapeInfoPack = ConstantShapeHelper::getInstance()->bufferForShapeInfo(subArrShapeInfo); diff --git a/libnd4j/include/ops/declarable/generic/transforms/split.cpp b/libnd4j/include/ops/declarable/generic/transforms/split.cpp index 60a80378e..462f2c77e 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/split.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/split.cpp @@ -84,17 +84,14 @@ namespace ops { DECLARE_SHAPE_FN(split) { int num_splits = INT_ARG(0); - Nd4jLong *input = nullptr; - sd::DataType dataType; + auto input = inputShape->at(0); + sd::DataType dataType = ArrayOptions::dataType(input); // axis is 0 by default int axis = 0; int inputVar = 0; - if (inputShape->size() == 1) { - input = inputShape->at(0); - dataType = ArrayOptions::dataType(input); - } else { + if (inputShape->size() != 1) { auto shape0 = inputShape->at(0); auto shape1 = inputShape->at(1); diff --git a/libnd4j/include/ops/declarable/generic/transforms/stack.cpp b/libnd4j/include/ops/declarable/generic/transforms/stack.cpp index a78442b03..65cd41a3a 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/stack.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/stack.cpp @@ -41,7 +41,7 @@ CUSTOM_OP_IMPL(stack, -1, 1, false, 0, 0) { // input validation // check whether shapes of all input array are the same for (int i = 0; i < (int) block.width() - 1; ++i) - REQUIRE_TRUE(shape::equalsSoft((INPUT_VARIABLE(i))->getShapeInfo(), (INPUT_VARIABLE(i+1))->getShapeInfo()), 0, "STACK op: the shapes of all input arrays must be the same !"); + REQUIRE_TRUE(shape::equalsSoft((INPUT_VARIABLE(i))->shapeInfo(), (INPUT_VARIABLE(i+1))->shapeInfo()), 0, "STACK op: the shapes of all input arrays must be the same !"); REQUIRE_TRUE(dim <= input->rankOf(), 0, "STACK op: the input dimension parameter must be <= rank of input arrays shapes (rank=%i), but got %i instead !", input->shapeOf(), dim); diff --git a/libnd4j/include/ops/declarable/generic/transforms/tile.cpp b/libnd4j/include/ops/declarable/generic/transforms/tile.cpp index 6041d1c41..4dc259bba 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/tile.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/tile.cpp @@ -68,7 +68,7 @@ CUSTOM_OP_IMPL(tile, 1, 1, false, 0, -2) { DECLARE_SHAPE_FN(tile) { - Nd4jLong* inShape = inputShape->at(0); + auto inShape = inputShape->at(0); const int inRank = inShape[0]; std::vector reps; @@ -145,8 +145,8 @@ CUSTOM_OP_IMPL(tile_bp, 2, 1, false, 0, -2) { DECLARE_SHAPE_FN(tile_bp) { - Nd4jLong* inShape = inputShape->at(0); - Nd4jLong* gradOShape = inputShape->at(1); + auto inShape = inputShape->at(0); + auto gradOShape = inputShape->at(1); const int inRank = inShape[0]; std::vector reps; diff --git a/libnd4j/include/ops/declarable/generic/updaters/adaDeltaUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/adaDeltaUpdater.cpp index bab205543..93f01ae1f 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/adaDeltaUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/adaDeltaUpdater.cpp @@ -41,11 +41,11 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateMsg), 0, "ADA_DELTA UPDATER OP: input state Msg must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateMsg->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateMsg->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateMsdx), 0, "ADA_DELTA UPDATER OP: input state Msdx must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateMsdx->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateMsdx->shapeInfo()).c_str()); bool bParamsSupply = 5 == block.width() || 2 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/adaGradUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/adaGradUpdater.cpp index a7a92b410..4cd5b0504 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/adaGradUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/adaGradUpdater.cpp @@ -39,8 +39,8 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initState), 0, "ADA_GRAD UPDATER OP: input state must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initState->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initState->shapeInfo()).c_str()); bool bParamsSupply = 4 == block.width() || 2 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/adaMaxUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/adaMaxUpdater.cpp index 4e34c24f6..9f4bb574b 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/adaMaxUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/adaMaxUpdater.cpp @@ -42,11 +42,11 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateU), 0, "ADA_MAX UPDATER OP: input state V must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateU->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateU->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "ADA_MAX UPDATER OP: input state M must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateM->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str()); bool bParamsSupply = 7 == block.width() || 4 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/adamUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/adamUpdater.cpp index a696d2388..96386c45b 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/adamUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/adamUpdater.cpp @@ -42,11 +42,11 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateU), 0, "ADAM UPDATER OP: input state V must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateU->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateU->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "ADAM UPDATER OP: input state M must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateM->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str()); bool bParamsSupply = 7 == block.width() || 4 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/amsGradUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/amsGradUpdater.cpp index bc0f4beac..32084d970 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/amsGradUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/amsGradUpdater.cpp @@ -44,14 +44,14 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateV), 0, "AMSGRAD UPDATER OP: input state Msg must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateV->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateV->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "AMSGRAD UPDATER OP: input state Msdx must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateM->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateH), 0, "AMSGRAD UPDATER OP: input state Msdx must have the same shape as gradient!," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateH->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateH->shapeInfo()).c_str()); bool bParamsSupply = 8 == block.width() || 4 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/nadamUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/nadamUpdater.cpp index c6af0686b..4d5e4e12e 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/nadamUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/nadamUpdater.cpp @@ -42,11 +42,11 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "NADAM UPDATER OP: input state M must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateM->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateV), 0, "NADAM UPDATER OP: input state V must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateV->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateV->shapeInfo()).c_str()); bool bParamsSupply = 7 == block.width() || 4 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/nesterovsUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/nesterovsUpdater.cpp index c77abd448..bcbefe36b 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/nesterovsUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/nesterovsUpdater.cpp @@ -39,8 +39,8 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initState), 0, "NESTEROVS UPDATER OP: input state Msg must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initState->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initState->shapeInfo()).c_str()); bool bParamsSupply = 4 == block.width() || 2 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/rmsPropUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/rmsPropUpdater.cpp index 1ca318e26..a611a4fbe 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/rmsPropUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/rmsPropUpdater.cpp @@ -39,8 +39,8 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initState), 0, "RMS_PROB UPDATER OP: input state must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initState->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initState->shapeInfo()).c_str()); bool bParamsSupply = 5 == block.width() || 3 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp b/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp index 0103e8672..5518588e4 100644 --- a/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp +++ b/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp @@ -31,7 +31,7 @@ namespace sd { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); - nd4j_printf(": Actuality: [HOST: %s; DEVICE: %s]; affinity: [%i]; Pointers: [HOST: %p; DEVICE: %p]; DataBuffer length: %lld\n", block.nodeId(), input->isActualOnHostSide() ? "true" : "false", input->isActualOnDeviceSide() ? "true" : "false", input->dataBuffer()->deviceId(), input->getBuffer(), input->getSpecialBuffer(), input->dataBuffer()->getLenInBytes()); + nd4j_printf(": Actuality: [HOST: %s; DEVICE: %s]; affinity: [%i]; Pointers: [HOST: %p; DEVICE: %p]; DataBuffer length: %lld\n", block.nodeId(), input->isActualOnHostSide() ? "true" : "false", input->isActualOnDeviceSide() ? "true" : "false", input->dataBuffer()->deviceId(), input->buffer(), input->specialBuffer(), input->dataBuffer()->getLenInBytes()); return Status::OK(); } diff --git a/libnd4j/include/ops/declarable/helpers/compression.h b/libnd4j/include/ops/declarable/helpers/compression.h index 10eecb09f..b9c70a91b 100644 --- a/libnd4j/include/ops/declarable/helpers/compression.h +++ b/libnd4j/include/ops/declarable/helpers/compression.h @@ -26,7 +26,7 @@ namespace sd { namespace ops { namespace helpers { - void decodeBitmap(sd::LaunchContext* context, NDArray* input, NDArray* output); + void decodeBitmap(sd::LaunchContext* context, const NDArray* input, NDArray* output); Nd4jLong encodeBitmap(sd::LaunchContext* context, NDArray* input, NDArray* output, float threshold); } } diff --git a/libnd4j/include/ops/declarable/helpers/convolutions.h b/libnd4j/include/ops/declarable/helpers/convolutions.h index f38692a35..eb41ae637 100644 --- a/libnd4j/include/ops/declarable/helpers/convolutions.h +++ b/libnd4j/include/ops/declarable/helpers/convolutions.h @@ -155,7 +155,7 @@ namespace sd { // evaluates sizes values and indexes using input and output arrays depending on data format static inline void getSizesAndIndexesConv2d(const bool isNCHW, const int wFormat, const NDArray& input, const NDArray& output, int& bS, int& iC, int& iH, int& iW, int& oC, int& oH, int& oW, int& indIOioC, int& indIiH, int& indWiC, int& indWoC, int& indWkH, int& indOoH) { - getSizesAndIndexesConv2d(isNCHW, wFormat, input.getShapeInfo(), output.getShapeInfo(), bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH); + getSizesAndIndexesConv2d(isNCHW, wFormat, input.shapeInfo(), output.shapeInfo(), bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH); } static inline void getSizesAndIndexesConv2d(const bool isNCHW, const int wFormat, const Nd4jLong* inShapeInfo, const Nd4jLong* outShapeInfo, int& bS, int& iC, int& iH, int& iW, int& oC, int& oH, int& oW, int& indIOioC, int& indIiH, int& indWiC, int& indWoC, int& indWkH, int& indOoH) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp b/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp index 5ac61964c..97bdd5c89 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp @@ -28,8 +28,8 @@ namespace helpers { Nd4jLong barnes_row_count(const NDArray* rowP, const NDArray* colP, Nd4jLong N, NDArray& rowCounts) { int* pRowCounts = reinterpret_cast(rowCounts.buffer()); - int const* pRows = reinterpret_cast(rowP->getBuffer()); - int const* pCols = reinterpret_cast(colP->getBuffer()); + int const* pRows = reinterpret_cast(rowP->buffer()); + int const* pCols = reinterpret_cast(colP->buffer()); for (Nd4jLong n = 0; n < N; n++) { int begin = pRows[n];//->e(n); int end = pRows[n + 1];//rowP->e(n + 1); @@ -69,7 +69,7 @@ namespace helpers { //NDArray symValP = NDArrayFactory::create('c', {numElements}); //symRowP.insert(symRowP.begin(),0); //symRowP(1, {0}) = *rowCounts; - int const* pRows = reinterpret_cast(rowP->getBuffer()); + int const* pRows = reinterpret_cast(rowP->buffer()); int* symRowP = reinterpret_cast(outputRows->buffer()); symRowP[0] = 0; for (Nd4jLong n = 0; n < N; n++) @@ -79,8 +79,8 @@ namespace helpers { int* symColP = reinterpret_cast(outputCols->buffer()); // symRowP.p(n + 1, symRowP.e(n) + rowCounts.e(n)) // outputRows->printBuffer("SymRows are"); - int const* pCols = reinterpret_cast(colP->getBuffer()); - T const* pVals = reinterpret_cast(valP->getBuffer()); + int const* pCols = reinterpret_cast(colP->buffer()); + T const* pVals = reinterpret_cast(valP->buffer()); T* pOutput = reinterpret_cast(outputVals->buffer()); //std::vector rowCountsV = rowCounts->getBufferAsVector(); std::vector offset(N);// = NDArrayFactory::create('c', {N}); @@ -143,8 +143,8 @@ namespace helpers { template static void barnes_edge_forces_(const NDArray* rowP, NDArray const* colP, NDArray const* valP, int N, NDArray const* data, NDArray* output) { - T const* dataP = reinterpret_cast(data->getBuffer()); - T const* vals = reinterpret_cast(valP->getBuffer()); + T const* dataP = reinterpret_cast(data->buffer()); + T const* vals = reinterpret_cast(valP->buffer()); T* outputP = reinterpret_cast(output->buffer()); int colCount = data->columns(); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp b/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp index de56650c8..ccc4d676a 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp @@ -64,9 +64,9 @@ namespace helpers { const int rank = input.rankOf(); int temp; - if(shape::isCommonVector(input.getShapeInfo(), temp)) { + if(shape::isCommonVector(input.shapeInfo(), temp)) { - BUILD_SINGLE_SELECTOR(input.dataType(), _softMaxDerivForVector, (context, input.getBuffer(), input.getShapeInfo(), output.buffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), _softMaxDerivForVector, (context, input.buffer(), input.shapeInfo(), output.buffer()), FLOAT_TYPES); } else { auto maxAlongDim = const_cast(input).reduceAlongDimension(reduce::Max, {dimension}, true); @@ -79,8 +79,8 @@ namespace helpers { /////////////////////////////////////////////////////////////////// template - void logSoftMaxForVector_(void *input, Nd4jLong *inShapeInfo, void *output, Nd4jLong *outShapeInfo) { - auto inBuff = reinterpret_cast(input); + void logSoftMaxForVector_(void const* input, Nd4jLong const* inShapeInfo, void *output, Nd4jLong const* outShapeInfo) { + auto inBuff = reinterpret_cast(input); auto outBuff = reinterpret_cast(output); T max = -DataTypeUtils::max(); @@ -126,21 +126,21 @@ namespace helpers { } /////////////////////////////////////////////////////////////////// - void logSoftMaxForVector(sd::LaunchContext * context, const NDArray& input, NDArray& output) { + void logSoftMaxForVector(sd::LaunchContext* context, const NDArray& input, NDArray& output) { if(!input.isVector() || !output.isVector()) throw std::runtime_error("ops::helpers::logSoftMaxForVector function input and output arrays must be vectors !"); auto xType = input.dataType(); - BUILD_SINGLE_SELECTOR(xType, logSoftMaxForVector_, (input.getBuffer(), input.getShapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(xType, logSoftMaxForVector_, (input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); } ////////////////////////////////////////////////////////////////////////// void prelu(sd::LaunchContext * context, const NDArray& input, const NDArray& alpha, NDArray& output) { const Nd4jLong inputLen = input.lengthOf(); - const Nd4jLong* inputShapeInfo = input.getShapeInfo(); - const Nd4jLong* alphaShapeInfo = alpha.getShapeInfo(); + const Nd4jLong* inputShapeInfo = input.shapeInfo(); + const Nd4jLong* alphaShapeInfo = alpha.shapeInfo(); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { @@ -161,8 +161,8 @@ void prelu(sd::LaunchContext * context, const NDArray& input, const NDArray& alp void preluBP(sd::LaunchContext * context, const NDArray& input, const NDArray& alpha, const NDArray& dLdO, NDArray& dLdI, NDArray& dLdA) { const Nd4jLong inputLen = input.lengthOf(); - const Nd4jLong* inputShapeInfo = input.getShapeInfo(); - const Nd4jLong* alphaShapeInfo = alpha.getShapeInfo(); + const Nd4jLong* inputShapeInfo = input.shapeInfo(); + const Nd4jLong* alphaShapeInfo = alpha.shapeInfo(); dLdA.assign(0.0f); @@ -219,7 +219,7 @@ void preluBP(sd::LaunchContext * context, const NDArray& input, const NDArray& a if(input.isVector()) { if(rank == 1 || input.sizeAt(dimension) != 1) { - BUILD_SINGLE_SELECTOR(input.dataType(), logSoftMaxForVector_, (input.getBuffer(), input.getShapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), logSoftMaxForVector_, (input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); } else output = 0.; @@ -235,7 +235,7 @@ void preluBP(sd::LaunchContext * context, const NDArray& input, const NDArray& a } BUILD_SINGLE_TEMPLATE(template void thresholdReluDerivative_, (sd::LaunchContext * context, NDArray* input, double threshold, NDArray* dLdO, NDArray* output), FLOAT_TYPES); - BUILD_SINGLE_TEMPLATE(template void logSoftMaxForVector_, (void *input, Nd4jLong *inShapeInfo, void *output, Nd4jLong *outShapeInfo), FLOAT_TYPES); + BUILD_SINGLE_TEMPLATE(template void logSoftMaxForVector_, (void const* input, Nd4jLong const* inShapeInfo, void *output, Nd4jLong const* outShapeInfo), FLOAT_TYPES); BUILD_SINGLE_TEMPLATE(template void _softMaxDerivForVector, (sd::LaunchContext * context, const void *input, const Nd4jLong *inShapeInfo, void *output), FLOAT_TYPES); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp index 68b8c6955..a03b4504f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp @@ -396,15 +396,15 @@ namespace sd { } */ - Nd4jLong* x_shapeInfo = input.getShapeInfo(); - Nd4jLong* z_shapeInfo = output.getShapeInfo(); - X* x = input.bufferAsT(); - X* z = output.bufferAsT(); - const Y* b = bias.bufferAsT(); + auto x_shapeInfo = input.shapeInfo(); + auto z_shapeInfo = output.shapeInfo(); + auto x = input.bufferAsT(); + auto z = output.bufferAsT(); + auto b = bias.bufferAsT(); const Nd4jLong rank = x_shapeInfo[0]; - const Nd4jLong* bases = &(x_shapeInfo[1]); - const Nd4jLong* x_strides = &(x_shapeInfo[rank + 1]); - const Nd4jLong* z_strides = &(z_shapeInfo[rank + 1]); + auto bases = &(x_shapeInfo[1]); + auto x_strides = &(x_shapeInfo[rank + 1]); + auto z_strides = &(z_shapeInfo[rank + 1]); const bool inplaceOp = (x == z); const bool same_order = inplaceOp || (input.ordering() == output.ordering()); const bool channel_atTheEnd = !isNCHW; @@ -502,27 +502,27 @@ namespace sd { FUNC_1D func = [order, isContinuous, rank, x, b, bias_new, z, x_shapeInfo, z_shapeInfo, same_stride, same_order, yStrideC, rank_skip] (uint64_t thread_id, int64_t start, int64_t stop, int64_t increment) -> void { const Nd4jLong rank = x_shapeInfo[0]; - const Nd4jLong* bases = &(x_shapeInfo[1]); - const Nd4jLong* x_strides = &(x_shapeInfo[rank + 1]); - const Nd4jLong* z_strides = &(z_shapeInfo[rank + 1]); + auto bases = &(x_shapeInfo[1]); + auto x_strides = &(x_shapeInfo[rank + 1]); + auto z_strides = &(z_shapeInfo[rank + 1]); const bool inplaceOp = (x == z); if (order == 'c') { if (isContinuous) { - channel_atTheEnd_continous_C(x, bias_new, z, inplaceOp, start, stop, increment); + channel_atTheEnd_continous_C(const_cast(x), bias_new, z, inplaceOp, start, stop, increment); } // rank is in [2,5] else if (rank == 4) { - channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, const_cast(x), bias_new, z, start, stop, increment); } else if (rank == 5) { - channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, const_cast(x), bias_new, z, start, stop, increment); } else if (rank == 2) { - channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, const_cast(x), bias_new, z, start, stop, increment); } else if (rank == 3) { - channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, const_cast(x), bias_new, z, start, stop, increment); } } else { @@ -530,36 +530,36 @@ namespace sd { if (isContinuous) { if (rank == 4) { if (rank_skip == rank - 2) { - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } else { - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } } else if (rank == 5) { if (rank_skip == rank - 2) { //skip==3 - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } else { - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } } else if (rank == 3) { - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } } else if (rank == 4) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 5) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 2) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 3) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } } @@ -600,18 +600,18 @@ namespace sd { const bool inplaceOp = (x == z); if (order == 'c') { if (isContinuous) { - channel_NC_continous_numHW_C(rank, bases, x_strides, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_NC_continous_numHW_C(rank, bases, x_strides, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } // rank is in [3,5] else if (rank == 4) { - channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 5) { - channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 3) { - channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } } else { @@ -620,13 +620,13 @@ namespace sd { //continous case is missing if (rank == 4) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 5) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 3) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } } }; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp index 078ebda10..20d91ee8b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp @@ -59,8 +59,8 @@ static void adjustHue_(const NDArray *input, const NDArray* deltaScalarArr, NDAr } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp index c5c5cf9c6..6610b69ac 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp @@ -58,8 +58,8 @@ static void adjustSaturation_(const NDArray *input, const NDArray* factorScalarA samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp index daaf4f71a..ec8f040a9 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp @@ -72,9 +72,9 @@ void bgemm_(const std::vector& vA, const std::vector& vB, st } if (std::is_same::value) { - BlasHelper::getInstance()->dgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (double *) alphas->getBuffer(), (double **) buffersA.data(), tldA, (double **) buffersB.data(), tldB, (double *) betas->getBuffer(),(double **) buffersC.data(), tldC, vA.size(), tsize); + BlasHelper::getInstance()->dgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (double *) alphas->buffer(), (double **) buffersA.data(), tldA, (double **) buffersB.data(), tldB, (double *) betas->buffer(),(double **) buffersC.data(), tldC, vA.size(), tsize); } else if (std::is_same::value) { - BlasHelper::getInstance()->sgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (float *) alphas->getBuffer(), (float **) buffersA.data(), tldA, (float **) buffersB.data(), tldB, (float *) betas->getBuffer(), (float **) buffersC.data(), tldC, vA.size(), tsize); + BlasHelper::getInstance()->sgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (float *) alphas->buffer(), (float **) buffersA.data(), tldA, (float **) buffersB.data(), tldB, (float *) betas->buffer(), (float **) buffersC.data(), tldC, vA.size(), tsize); } // release temporary arrays diff --git a/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp index 2293fe843..65c342d9c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp @@ -44,13 +44,13 @@ static void batchnorm_(const NDArray* input, const NDArray* mean, const NDArray* const T* g = gamma == nullptr ? nullptr : gamma->bufferAsT(); const T* b = beta == nullptr ? nullptr : beta->bufferAsT(); - const bool xzSameOffset = shape::haveSameShapeAndStrides(input->getShapeInfo(), output->getShapeInfo()); + const bool xzSameOffset = shape::haveSameShapeAndStrides(input->shapeInfo(), output->shapeInfo()); - bool paramSameOffset = shape::haveSameShapeAndStrides(mean->getShapeInfo(), variance->getShapeInfo()); + bool paramSameOffset = shape::haveSameShapeAndStrides(mean->shapeInfo(), variance->shapeInfo()); if(paramSameOffset && gamma != nullptr) - paramSameOffset &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gamma->getShapeInfo()); + paramSameOffset &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gamma->shapeInfo()); if(paramSameOffset && beta != nullptr) - paramSameOffset &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), beta->getShapeInfo()); + paramSameOffset &= shape::haveSameShapeAndStrides(mean->shapeInfo(), beta->shapeInfo()); const Nd4jLong lenBig = input->lengthOf(); const Nd4jLong lenSmall = mean->lengthOf(); @@ -73,27 +73,27 @@ static void batchnorm_(const NDArray* input, const NDArray* mean, const NDArray* if(!isOwner) continue; - const auto meanOffset = shape::getIndexOffset(j, mean->getShapeInfo()); - const auto varOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, variance->getShapeInfo()); + const auto meanOffset = shape::getIndexOffset(j, mean->shapeInfo()); + const auto varOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, variance->shapeInfo()); const auto meanVal = m[meanOffset]; auto sigmaInvGam = static_cast(1) / sd::math::nd4j_sqrt(v[varOffset] + epsilon); if(g != nullptr) { - const auto gammaOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, gamma->getShapeInfo()); + const auto gammaOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, gamma->shapeInfo()); sigmaInvGam *= g[gammaOffset]; } T betaVal = static_cast(0); if(b != nullptr) { - const auto betaOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, beta->getShapeInfo()); + const auto betaOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, beta->shapeInfo()); betaVal = b[betaOffset]; } // calculate offsets for input and output - shape::outerArrayOffsets(xOffsets, j, input->getShapeInfo(), mean->getShapeInfo(), auxBuff, dimsToExclude.data()); + shape::outerArrayOffsets(xOffsets, j, input->shapeInfo(), mean->shapeInfo(), auxBuff, dimsToExclude.data()); if(!xzSameOffset) - shape::outerArrayOffsets(zOffsets, j, output->getShapeInfo(), mean->getShapeInfo(), auxBuff, dimsToExclude.data()); + shape::outerArrayOffsets(zOffsets, j, output->shapeInfo(), mean->shapeInfo(), auxBuff, dimsToExclude.data()); PRAGMA_OMP_SIMD for (Nd4jLong i = 0; i < steps; ++i) @@ -129,13 +129,13 @@ static void batchnorm2_(const NDArray* input, const NDArray* mean, const NDArray const uint minRank = mean->rankOf(); const uint numAxes = axes.size(); - const bool xzSameOffset = shape::haveSameShapeAndStrides(input->getShapeInfo(), output->getShapeInfo()); + const bool xzSameOffset = shape::haveSameShapeAndStrides(input->shapeInfo(), output->shapeInfo()); - bool paramSameOffset = shape::haveSameShapeAndStrides(mean->getShapeInfo(), variance->getShapeInfo()); + bool paramSameOffset = shape::haveSameShapeAndStrides(mean->shapeInfo(), variance->shapeInfo()); if(paramSameOffset && gamma != nullptr) - paramSameOffset &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gamma->getShapeInfo()); + paramSameOffset &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gamma->shapeInfo()); if(paramSameOffset && beta != nullptr) - paramSameOffset &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), beta->getShapeInfo()); + paramSameOffset &= shape::haveSameShapeAndStrides(mean->shapeInfo(), beta->shapeInfo()); auto func = PRAGMA_THREADS_FOR { @@ -149,10 +149,10 @@ static void batchnorm2_(const NDArray* input, const NDArray* mean, const NDArray for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, input->getShapeInfo(), xzCoords); + shape::index2coordsCPU(start, i, input->shapeInfo(), xzCoords); - const auto xOffset = shape::getOffset(input->getShapeInfo(), xzCoords); - const auto zOffset = xzSameOffset ? xOffset : shape::getOffset(output->getShapeInfo(), xzCoords); + const auto xOffset = shape::getOffset(input->shapeInfo(), xzCoords); + const auto zOffset = xzSameOffset ? xOffset : shape::getOffset(output->shapeInfo(), xzCoords); if(minRank == xRank) { for (uint j = 0; j < numAxes; ++j) @@ -161,20 +161,20 @@ static void batchnorm2_(const NDArray* input, const NDArray* mean, const NDArray else // minRank = numAxes = 1 in this case minCoords[0] = xzCoords[axes[0]]; - const auto meanOffset = shape::getOffset(mean->getShapeInfo(), minCoords); - const auto varianceOffset = paramSameOffset ? meanOffset : shape::getOffset(variance->getShapeInfo(), minCoords); + const auto meanOffset = shape::getOffset(mean->shapeInfo(), minCoords); + const auto varianceOffset = paramSameOffset ? meanOffset : shape::getOffset(variance->shapeInfo(), minCoords); T sigmaInvGam = 1. / sd::math::nd4j_sqrt(v[varianceOffset] + epsilon); if(g != nullptr) { - const auto gammaOffset = paramSameOffset ? meanOffset : shape::getOffset(gamma->getShapeInfo(), minCoords); + const auto gammaOffset = paramSameOffset ? meanOffset : shape::getOffset(gamma->shapeInfo(), minCoords); sigmaInvGam *= g[gammaOffset]; } z[zOffset] = (x[xOffset] - m[meanOffset]) * sigmaInvGam; if(b != nullptr) { - const auto betaOffset = paramSameOffset ? meanOffset : shape::getOffset(beta->getShapeInfo(), minCoords); + const auto betaOffset = paramSameOffset ? meanOffset : shape::getOffset(beta->shapeInfo(), minCoords); z[zOffset] += b[betaOffset]; } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp b/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp index cf46df2db..42d4af529 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp @@ -31,8 +31,8 @@ void col2im_(sd::LaunchContext & context, const NDArray& input, NDArray& output auto imBuff = output.bufferAsT(); auto colBuff = input.bufferAsT(); - auto imShapeBuffer = output.getShapeInfo(); - auto colShapeBuffer = input.getShapeInfo(); + auto imShapeBuffer = output.shapeInfo(); + auto colShapeBuffer = input.shapeInfo(); auto colShape = shape::shapeOf(colShapeBuffer); auto colStride = shape::stride(colShapeBuffer); auto imShape = shape::shapeOf(imShapeBuffer); @@ -60,7 +60,9 @@ void col2im_(sd::LaunchContext & context, const NDArray& input, NDArray& output if (false) { auto func = PRAGMA_THREADS_FOR_2D { - T *col, *im; + T const* col; + T* im; + int imRow, imCol; for (auto b = start_x; b < stop_x; b += inc_x) { @@ -96,20 +98,20 @@ void col2im_(sd::LaunchContext & context, const NDArray& input, NDArray& output for (auto b = start; b < stop; b++) { T *im0 = imBuff + b * imStride0; - T *col4 = colBuff + b * colStride0; + T const* col4 = colBuff + b * colStride0; for (int colH = 0; colH < oH; ++colH, col4 += colStride4) { - T *col5 = col4; + T const* col5 = col4; for (int colW = 0; colW < oW; ++colW, col5 += colStride5) { - T *col1 = col5; + T const* col1 = col5; T *im1 = im0; for (int c = 0; c < iC; ++c, col1 += colStride1, im1 += imStride1) { int imRow = (-pH + colH * sH); - T *col2 = col1; + T const* col2 = col1; T *im2 = im1 + imRow * imStride2; for (int kRow = 0; kRow < kH; ++kRow, col2 += colStride2, imRow += dH, im2 += dH * imStride2) { int imCol = -pW + colW * sW; - T *col3 = col2; + T const* col3 = col2; T *im3 = im2 + imCol * imStride3; for (int kCol = 0; kCol < kW; ++kCol, col3 += colStride3, imCol += dW, im3 += dW * imStride3) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp index 12961fe92..32dc3d7c7 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp @@ -22,7 +22,7 @@ namespace sd { namespace helpers { template static void _compare_elem(NDArray *input, bool isStrictlyIncreasing, bool& output) { - auto length = shape::length(input->getShapeInfo()); + auto length = shape::length(input->shapeInfo()); int elementsPerThread = length / ELEMENT_THRESHOLD; int num_threads = sd::math::nd4j_max(1, elementsPerThread); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compression/compression.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compression/compression.cpp index eac25e772..0911b0619 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compression/compression.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compression/compression.cpp @@ -24,7 +24,7 @@ namespace sd { namespace ops { namespace helpers { - void decodeBitmap(sd::LaunchContext* context, NDArray* input, NDArray* output) { + void decodeBitmap(sd::LaunchContext* context, const NDArray* input, NDArray* output) { NativeOpExecutioner::decodeBitmap(input->buffer(), output->lengthOf(), output->buffer(), output->shapeInfo()); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compression/threshold.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compression/threshold.cpp index b1f8f3b42..bac3812d1 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compression/threshold.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compression/threshold.cpp @@ -55,7 +55,7 @@ namespace sd { } void thresholdDecode(const NDArray &encoded, NDArray &updates) { - BUILD_SINGLE_SELECTOR(updates.dataType(), sd::TypeCast::convertFromThreshold, (nullptr, encoded.getBuffer(), updates.lengthOf(), updates.buffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(updates.dataType(), sd::TypeCast::convertFromThreshold, (nullptr, encoded.buffer(), updates.lengthOf(), updates.buffer()), FLOAT_TYPES); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/convolutions_col2vol.cpp b/libnd4j/include/ops/declarable/helpers/cpu/convolutions_col2vol.cpp index c9cae504a..b12064cac 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/convolutions_col2vol.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/convolutions_col2vol.cpp @@ -61,7 +61,7 @@ static void col2vol_(const NDArray& columns, NDArray& volume, const int sD, cons T* colBuff = const_cast(columns).bufferAsT(); - if (volume.ordering() == 'c' && columns.ordering() == 'c' && shape::strideDescendingCAscendingF(volume.getShapeInfo()) && shape::strideDescendingCAscendingF(columns.getShapeInfo())) { + if (volume.ordering() == 'c' && columns.ordering() == 'c' && shape::strideDescendingCAscendingF(volume.shapeInfo()) && shape::strideDescendingCAscendingF(columns.shapeInfo())) { auto func = PRAGMA_THREADS_FOR { T* col, *vol; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/convolutions_vol2col.cpp b/libnd4j/include/ops/declarable/helpers/cpu/convolutions_vol2col.cpp index 552dceb6a..4c8b5bad1 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/convolutions_vol2col.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/convolutions_vol2col.cpp @@ -59,7 +59,7 @@ static void vol2col_(const NDArray& volume, NDArray& columns, const int sD, cons T* volBuff = const_cast(volume).bufferAsT(); - if (volume.ordering() == 'c' && columns.ordering() == 'c' && shape::strideDescendingCAscendingF(volume.getShapeInfo()) && shape::strideDescendingCAscendingF(columns.getShapeInfo())) { + if (volume.ordering() == 'c' && columns.ordering() == 'c' && shape::strideDescendingCAscendingF(volume.shapeInfo()) && shape::strideDescendingCAscendingF(columns.shapeInfo())) { auto func = PRAGMA_THREADS_FOR_3D { T *col, *vol; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp b/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp index 598b3dc30..27b73d001 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp @@ -27,7 +27,7 @@ namespace helpers { template static void __depthToSpace(const NDArray &input, NDArray *output, int block_size, bool isNHWC) { - T *input_ptr = reinterpret_cast(input.getBuffer()); + T const*input_ptr = reinterpret_cast(input.buffer()); T *output_ptr = reinterpret_cast(output->buffer()); const int batch_size = input.sizeAt(0); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp b/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp index fbf071e28..1688dcbc4 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp @@ -38,9 +38,9 @@ static void dilation2d_(NDArray *input, NDArray *weights, NDArray *output, const const X* y = weights->bufferAsT(); Z* z = output->bufferAsT(); - const Nd4jLong* xShapeInfo = input->getShapeInfo(); - const Nd4jLong* yShapeInfo = weights->getShapeInfo(); - const Nd4jLong* zShapeInfo = output->getShapeInfo(); + const Nd4jLong* xShapeInfo = input->shapeInfo(); + const Nd4jLong* yShapeInfo = weights->shapeInfo(); + const Nd4jLong* zShapeInfo = output->shapeInfo(); const uint bS = input->sizeAt(0); const uint iH = input->sizeAt(1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp index fb715a5e5..1deb12752 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp @@ -78,20 +78,19 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in const Nd4jLong numOfSubArrs = indices->lengthOf(); - auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimsIn); - auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimsOut); + auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimsIn); + auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimsOut); - Nd4jLong* inTadShapeInfo = inTadPack.primaryShapeInfo(); - Nd4jLong* outTadShapeInfo = outTadPack.primaryShapeInfo(); + auto inTadShapeInfo = inTadPack.primaryShapeInfo(); + auto outTadShapeInfo = outTadPack.primaryShapeInfo(); if (shape::order(inTadShapeInfo) == shape::order(outTadShapeInfo) && shape::order(inTadShapeInfo) == 'c' && input->dataType() == output->dataType() && shape::elementWiseStride(inTadShapeInfo) == 1 && shape::elementWiseStride(outTadShapeInfo) == 1) { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - - void* inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[indices->e(i)]); - void* outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); + auto inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[indices->e(i)]); + auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); memcpy(outBuff, inBuff, shape::length(inTadShapeInfo) * input->sizeOfT()); } @@ -102,8 +101,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - void* inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[indices->e(i)]); - void* outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); + auto inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[indices->e(i)]); + auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input->getContext(), transform::Assign, inBuff, inTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, @@ -130,19 +129,18 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in std::vector dims = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis}); - auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dims); - auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dims); + auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dims); + auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dims); - Nd4jLong* inTadShapeInfo = inTadPack.primaryShapeInfo(); - Nd4jLong* outTadShapeInfo = outTadPack.primaryShapeInfo(); + auto inTadShapeInfo = inTadPack.primaryShapeInfo(); + auto outTadShapeInfo = outTadPack.primaryShapeInfo(); if (shape::order(inTadShapeInfo) == shape::order(outTadShapeInfo) && shape::order(inTadShapeInfo) == 'c' && input->dataType() == output->dataType() && shape::elementWiseStride(inTadShapeInfo) == 1 && shape::elementWiseStride(outTadShapeInfo) == 1) { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - - void* inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[intArgs[i + 1]]); + auto inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[intArgs[i + 1]]); void* outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); std::memcpy(outBuff, inBuff, shape::length(inTadShapeInfo) * input->sizeOfT()); @@ -156,9 +154,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - - void* inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[intArgs[i + 1]]); - void* outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); + auto inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[intArgs[i + 1]]); + auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input->getContext(), transform::Assign, inBuff, inTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp index f7cb1cf59..db62c4b4f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp @@ -34,9 +34,9 @@ namespace helpers { template static void gatherND_(NDArray& input, NDArray& indices, NDArray& output) { - const X* x = reinterpret_cast(input.getBuffer()); - const Y* y = reinterpret_cast(indices.getBuffer()); - X* z = reinterpret_cast(output.getBuffer()); + const X* x = reinterpret_cast(input.buffer()); + const Y* y = reinterpret_cast(indices.buffer()); + X* z = reinterpret_cast(output.buffer()); const int xRank = input.rankOf(); const int yRank = indices.rankOf(); @@ -56,13 +56,13 @@ static void gatherND_(NDArray& input, NDArray& indices, NDArray& output) { for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); temp = zCoords[yRank - 1]; zCoords[yRank - 1] = 0; - const auto yOffset = shape::getOffset(indices.getShapeInfo(), zCoords); + const auto yOffset = shape::getOffset(indices.shapeInfo(), zCoords); zCoords[yRank - 1] = temp; if(bEqual) @@ -75,7 +75,7 @@ static void gatherND_(NDArray& input, NDArray& indices, NDArray& output) { for (uint j = 0; j < yLastDim; ++j) xCoords[j] = y[yOffset + j * indices.stridesOf()[yRank - 1]]; // last stride - const auto xOffset = shape::getOffset(input.getShapeInfo(), xCoords); + const auto xOffset = shape::getOffset(input.shapeInfo(), xCoords); z[zOffset] = x[xOffset]; } @@ -116,9 +116,9 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con output->assign(scalarNDArray); } else { auto dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis}); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto tadArr = NDArray(reinterpret_cast(reinterpret_cast(input->getBuffer()) + tadPack.primaryOffsets()[indices->e(0)]), tadPack.primaryShapeInfo(), output->getContext()); + auto tadArr = NDArray(reinterpret_cast(reinterpret_cast(input->buffer()) + tadPack.primaryOffsets()[indices->e(0)]), tadPack.primaryShapeInfo(), output->getContext()); output->assign(&tadArr); } } @@ -135,7 +135,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con std::vector dimsOut(indices->rankOf()); std::iota(dimsOut.begin(), dimsOut.end(), axis); // fill with axis, axis+1, ... indices->rankOf()-1 - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(output->getShapeInfo(), dimsOut); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(output->shapeInfo(), dimsOut); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { @@ -159,7 +159,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con output->assign((*input)(intArgs[1], {axis})); } else { // vector case - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(output->getShapeInfo(), {axis}); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(output->shapeInfo(), {axis}); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/histogram.cpp b/libnd4j/include/ops/declarable/helpers/cpu/histogram.cpp index 9fc6ddefb..cb815110d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/histogram.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/histogram.cpp @@ -24,8 +24,8 @@ namespace sd { namespace ops { namespace helpers { template - static void histogram_(void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, Nd4jLong numBins, double min_val, double max_val) { - auto dx = reinterpret_cast(xBuffer); + static void histogram_(void const* xBuffer, Nd4jLong const* xShapeInfo, void *zBuffer, Nd4jLong const* zShapeInfo, Nd4jLong numBins, double min_val, double max_val) { + auto dx = reinterpret_cast(xBuffer); auto result = reinterpret_cast(zBuffer); int length = shape::length(xShapeInfo); @@ -63,7 +63,7 @@ namespace sd { double min_val = input.reduceNumber(reduce::SameOps::Min).e(0); double max_val = input.reduceNumber(reduce::SameOps::Max).e(0); - BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (input.buffer(), input.shapeInfo(), output.getBuffer(), output.getShapeInfo(), numBins, min_val, max_val), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo(), numBins, min_val, max_val), LIBND4J_TYPES, INDEXING_TYPES); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp b/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp index 2129b4bee..2434fddcc 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp @@ -32,10 +32,10 @@ static void im2col_(sd::LaunchContext & context, const NDArray& input, NDArray& // input [bS, iC, iH, iW] is convoluted to output [bS, iC, kH, kW, oH, oW] - auto imBuff = static_cast(input.getBuffer()); - auto colBuff = static_cast(output.getBuffer()); - auto imShapeBuffer = input.getShapeInfo(); - auto colShapeBuffer = output.getShapeInfo(); + auto imBuff = static_cast(input.buffer()); + auto colBuff = static_cast(output.buffer()); + auto imShapeBuffer = input.shapeInfo(); + auto colShapeBuffer = output.shapeInfo(); auto colShape = shape::shapeOf(colShapeBuffer); auto colStride = shape::stride(colShapeBuffer); auto imShape = shape::shapeOf(imShapeBuffer); @@ -95,7 +95,8 @@ static void im2col_(sd::LaunchContext & context, const NDArray& input, NDArray& else { auto func = PRAGMA_THREADS_FOR_2D { - T *col, *im; + T *col; + T const* im; int imRow, imCol; for (auto b = start_x; b < stop_x; b += inc_x) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp index 682677ef3..2183b7d5a 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp @@ -53,9 +53,9 @@ static void rgbToGrs_(const NDArray& input, NDArray& output, const int dimC) { int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), coords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), coords); - const auto xOffset0 = shape::getOffset(input.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, output.shapeInfo(), coords); + const auto zOffset = shape::getOffset(output.shapeInfo(), coords); + const auto xOffset0 = shape::getOffset(input.shapeInfo(), coords); const auto xOffset1 = xOffset0 + input.strideAt(dimC); const auto xOffset2 = xOffset1 + input.strideAt(dimC); z[zOffset] = 0.2989f*x[xOffset0] + 0.5870f*x[xOffset1] + 0.1140f*x[xOffset2]; @@ -91,8 +91,8 @@ FORCEINLINE static void rgbToFromYuv_(const NDArray& input, NDArray& output, con return; } - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input.stridesOf()[dimC]; @@ -149,8 +149,8 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; @@ -199,8 +199,8 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp index 5a4bb28cc..687153f99 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp @@ -40,7 +40,7 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector output->p(i, 1); } else { - int eleStride = shape::elementWiseStride(input->getShapeInfo()); + int eleStride = shape::elementWiseStride(input->shapeInfo()); if (eleStride == 1) { int maxIdx = 0; auto currMax = input->e(0); @@ -125,8 +125,8 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector //moving all dimensions (in sorted order) //to the back. //permuted version of the input shape info for setting up the tad problem - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), const_cast(dimensions.data()), dimensionsLength); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), const_cast(dimensions.data()), dimensionsLength); + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), const_cast(dimensions.data()), dimensionsLength); + auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), const_cast(dimensions.data()), dimensionsLength); auto tadShapeShapeInfo = tadPack.primaryShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp index 31235d737..8dc31d8c0 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp @@ -35,13 +35,13 @@ static int lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* outpu const int rank = input->rankOf(); - TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {rank - 1}); + TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {rank - 1}); TadPack outTadPack; - if(shape::haveSameShapeAndStrides(input->getShapeInfo(), output->getShapeInfo())) + if(shape::haveSameShapeAndStrides(input->shapeInfo(), output->shapeInfo())) outTadPack = inTadPack; else - outTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {rank - 1}); + outTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {rank - 1}); const Nd4jLong numOfTads = inTadPack.numberOfTads(); const Nd4jLong tadLen = input->sizeAt(-1); @@ -52,8 +52,8 @@ static int lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* outpu const Nd4jLong inTadEws = shape::elementWiseStride(inTadPack.primaryShapeInfo()); const Nd4jLong outTadEws = shape::elementWiseStride(outTadPack.primaryShapeInfo()); - const T* inBuff = reinterpret_cast(input->getBuffer()); - T* outBuff = reinterpret_cast(output->getBuffer()); + const T* inBuff = reinterpret_cast(input->buffer()); + T* outBuff = reinterpret_cast(output->buffer()); const T tbias = static_cast(bias); const T tbeta = static_cast(beta); @@ -151,13 +151,13 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c const int rank = input.rankOf(); - TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), {rank - 1}); + TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {rank - 1}); TadPack gradITadPack; - if(shape::haveSameShapeAndStrides(input.getShapeInfo(), gradI.getShapeInfo())) + if(shape::haveSameShapeAndStrides(input.shapeInfo(), gradI.shapeInfo())) gradITadPack = inTadPack; else - gradITadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradI.getShapeInfo(), {rank - 1}); + gradITadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), {rank - 1}); const Nd4jLong numOfTads = inTadPack.numberOfTads(); const Nd4jLong tadLen = input.sizeAt(-1); @@ -168,8 +168,8 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c const Nd4jLong inTadEws = shape::elementWiseStride(inTadPack.primaryShapeInfo()); const Nd4jLong gradITadEws = shape::elementWiseStride(gradITadPack.primaryShapeInfo()); - const X* inBuff = reinterpret_cast(input.getBuffer()); - Y* gradIBuff = reinterpret_cast(gradI.getBuffer()); + const X* inBuff = reinterpret_cast(input.buffer()); + Y* gradIBuff = reinterpret_cast(gradI.buffer()); const Y tbias = static_cast(bias); const Y tbeta = static_cast(beta); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp index 554486bbf..675fb2794 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp @@ -52,7 +52,7 @@ namespace helpers { if (fast) { // Cholesky decomposition approach // Equation for solve A^T * Ax = A^T * b, so // 1. Computing A2: - auto tAtShape = ShapeUtils::evalShapeForMatmul(leftInput->getShapeInfo(), leftInput->getShapeInfo(), true, false); + auto tAtShape = ShapeUtils::evalShapeForMatmul(leftInput->shapeInfo(), leftInput->shapeInfo(), true, false); //tAtShape[tAtShape.size() - 2] = output->sizeAt(-2); NDArray leftOutput('c', tAtShape, output->dataType(), context); MmulHelper::matmul(leftInput, leftInput, &leftOutput, true, false); // Computing A2 = A^T * A diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp index 8938a98f9..0f435cfdb 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp @@ -40,7 +40,7 @@ namespace helpers { BUILD_SINGLE_TEMPLATE(template void swapRows_, (NDArray* matrix, int theFirst, int theSecond), FLOAT_TYPES); template - static void swapRows(T* matrixBuf, Nd4jLong* matrixShape, Nd4jLong theFirst, Nd4jLong theSecond) { + static void swapRows(T* matrixBuf, Nd4jLong const* matrixShape, Nd4jLong theFirst, Nd4jLong theSecond) { if (theFirst != theSecond) { auto n = shape::sizeAt(matrixShape, -1); @@ -208,7 +208,7 @@ namespace helpers { * lu decomposition with naive algorithm with partial pivoting * */ template - static I argmaxCol(I column, T* compoundBuffer, Nd4jLong* compoundShape) { + static I argmaxCol(I column, T* compoundBuffer, Nd4jLong const* compoundShape) { auto rowNum = shape::sizeAt(compoundShape, 0); Nd4jLong xInitial[] = {column, column}; auto xInitialIndex = shape::getOffset(compoundShape, xInitial, 0); @@ -230,7 +230,7 @@ namespace helpers { } template - void processColumns(int currentRow, int rowNum, T* compoundBuf, Nd4jLong* compoundShape) { + void processColumns(int currentRow, int rowNum, T* compoundBuf, Nd4jLong const* compoundShape) { Nd4jLong xDiag[] = {currentRow, currentRow}; auto diagIndex = shape::getOffset(compoundShape, xDiag, 0); auto loop = PRAGMA_THREADS_FOR { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp b/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp index 60df150a9..443048c56 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp @@ -39,9 +39,9 @@ void matrixSetDiag_(const NDArray& input, const NDArray& diagonal, NDArray& outp const T* y = diagonal.bufferAsT(); T* z = output.bufferAsT(); - const Nd4jLong* xShapeInfo = input.getShapeInfo(); - const Nd4jLong* yShapeInfo = diagonal.getShapeInfo(); - const Nd4jLong* zShapeInfo = output.getShapeInfo(); + const Nd4jLong* xShapeInfo = input.shapeInfo(); + const Nd4jLong* yShapeInfo = diagonal.shapeInfo(); + const Nd4jLong* zShapeInfo = output.shapeInfo(); const bool areSameOffsets = shape::haveSameShapeAndStrides(xShapeInfo, zShapeInfo); // shapes are definitely the same, but strides might not diff --git a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp index 74007635f..7874d6d67 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp @@ -131,10 +131,10 @@ static void mergeMaxBp_(const std::vector& inArrs, std::vectorgetShapeInfo(); + auto gradShape = inArrs[numArgs]->shapeInfo(); std::vector vbSameShaepeAndStrides(numArgs); for (int i = 0; i < numArgs; ++i) { - vbSameShaepeAndStrides[i] = shape::haveSameShapeAndStrides(gradShape, inArrs[i]->getShapeInfo()); + vbSameShaepeAndStrides[i] = shape::haveSameShapeAndStrides(gradShape, inArrs[i]->shapeInfo()); } auto func = PRAGMA_THREADS_FOR{ @@ -151,7 +151,7 @@ static void mergeMaxBp_(const std::vector& inArrs, std::vectorgetShapeInfo(), coords); + const auto xOffset = vbSameShaepeAndStrides[i] ? gradOffset : shape::getOffset(inArrs[i]->shapeInfo(), coords); const T* v = inArrs[i]->bufferAsT(); if (v[xOffset] > max) { max = v[xOffset]; @@ -159,7 +159,7 @@ static void mergeMaxBp_(const std::vector& inArrs, std::vectorgetShapeInfo(), coords); + const auto zOffset = vbSameShaepeAndStrides[nMaxIndex] ? gradOffset : shape::getOffset(outArrs[nMaxIndex]->shapeInfo(), coords); T* z = outArrs[nMaxIndex]->bufferAsT(); z[zOffset] = gradient[gradOffset]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp index 2730d9e88..53565f3c1 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp @@ -34,7 +34,7 @@ namespace helpers { NDArray sortedVals(*input); if (input->isVector()) { //std::vector data(input->lengthOf()); - //memcpy(&data[0], input->getBuffer(), sizeof(T) * data.size()); + //memcpy(&data[0], input->buffer(), sizeof(T) * data.size()); //size_t l = 0; //for (size_t l = 0; l < data.size(); ++l) // data[l] = input->e(l); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp index d3f7add49..2aa14585b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp @@ -27,9 +27,9 @@ namespace sd { namespace ops { namespace helpers { template - static void onehot_(void *voutput, Nd4jLong *zShapeInfo, void *vindices, Nd4jLong *iShapeInfo, int axis, double on, double off) { + static void onehot_(void *voutput, Nd4jLong const* zShapeInfo, void const* vindices, Nd4jLong const* iShapeInfo, int axis, double on, double off) { auto output = reinterpret_cast(voutput); - auto indices = reinterpret_cast(vindices); + auto indices = reinterpret_cast(vindices); auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(zShapeInfo, {axis}); @@ -96,7 +96,7 @@ namespace sd { auto zType = output->dataType(); auto iType = indices->dataType(); - BUILD_DOUBLE_SELECTOR(zType, iType, onehot_, (output->buffer(), output->shapeInfo(), indices->getBuffer(), indices->getShapeInfo(), axis, on, off), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(zType, iType, onehot_, (output->buffer(), output->shapeInfo(), indices->buffer(), indices->shapeInfo(), axis, on, off), LIBND4J_TYPES, LIBND4J_TYPES); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/pad.cpp b/libnd4j/include/ops/declarable/helpers/cpu/pad.cpp index b303d95ae..a0efd44c1 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/pad.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/pad.cpp @@ -52,8 +52,8 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); memcpy(xCoords, zCoords, rank * sizeof(int)); @@ -75,7 +75,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray } if (within) - z[zOffset] = x[shape::getOffset(input.getShapeInfo(), xCoords)]; + z[zOffset] = x[shape::getOffset(input.shapeInfo(), xCoords)]; else z[zOffset] = padVal; } @@ -94,8 +94,8 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); memcpy(xCoords, zCoords, rank * sizeof(int)); @@ -112,7 +112,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray xCoords[j] = 2 * xShape[j] - xCoords[j] - shift2; // means fill from right } - const auto xOffset = shape::getOffset(input.getShapeInfo(), xCoords); + const auto xOffset = shape::getOffset(input.shapeInfo(), xCoords); z[zOffset] = x[xOffset]; } }; @@ -148,7 +148,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray // Nd4jLong startL = mode == 1 ? 1 : 0; // REFLECT or SYMMETRIC // Nd4jLong startR = mode == 1 ? inDimSize-2 : inDimSize-1; // REFLECT or SYMMETRIC -// Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.getShapeInfo(), dimsToExclude); +// Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.shapeInfo(), dimsToExclude); // NDArray outSubArr0 = output(outIdx[0], true); @@ -209,7 +209,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray // startL = mode == 1 ? numLeft + 1 : numLeft; // REFLECT or SYMMETRIC // startR = mode == 1 ? numLeft + inDimSize - 2 : numLeft + inDimSize-1; // REFLECT or SYMMETRIC -// numOfSubArrs = ShapeUtils::getNumOfSubArrs(output.getShapeInfo(), dimsToExclude); +// numOfSubArrs = ShapeUtils::getNumOfSubArrs(output.shapeInfo(), dimsToExclude); // PRAGMA_OMP_PARALLEL_FOR_ARGS(firstprivate(outIdxOuter, outIdxInner)) // for(Nd4jLong j = 0; j < numOfSubArrs; ++j) { @@ -294,7 +294,7 @@ static void mirrorPad_(const NDArray& input, const NDArray& paddings, NDArray& o for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), outIdx); + shape::index2coordsCPU(start, i, output.shapeInfo(), outIdx); for (int j = 0; j < rank; ++j) { const Nd4jLong inLen = input.sizeAt(j); @@ -312,9 +312,9 @@ static void mirrorPad_(const NDArray& input, const NDArray& paddings, NDArray& o inIdx[j] = len - outIdx[j]; } - auto outOffset = shape::getOffset(output.getShapeInfo(), outIdx); - auto inOffset = shape::getOffset(input.getShapeInfo(), inIdx); - reinterpret_cast(output.buffer())[outOffset] = reinterpret_cast(input.getBuffer())[inOffset]; + auto outOffset = shape::getOffset(output.shapeInfo(), outIdx); + auto inOffset = shape::getOffset(input.shapeInfo(), inIdx); + reinterpret_cast(output.buffer())[outOffset] = reinterpret_cast(input.buffer())[inOffset]; } }; @@ -340,13 +340,13 @@ static void recursiveLoopForPad_(const int mode, NDArray& input, const NDArray& // then we use this array for tads building, every time while recursion the number of built tads becomes bigger dimensions.erase(dimensions.begin()); // build tad basing on output array, also create auxiliary arrays pointing on required output array ranges - shape::TAD tadOut(output.getShapeInfo(), dimensions.data(), dimensions.size()); + shape::TAD tadOut(output.shapeInfo(), dimensions.data(), dimensions.size()); tadOut.createTadOnlyShapeInfo(); tadOut.createOffsets(); auto subArrOut = NDArray(output.getBuffer(), tadOut.tadOnlyShapeInfo, output.getContext()); auto subArr = NDArray(output.getBuffer(), tadOut.tadOnlyShapeInfo, output.getContext()); // build tad basing on input array, also create auxiliary array pointing on required input array range - shape::TAD tadIn(input.getShapeInfo(), dimensions.data(), dimensions.size()); + shape::TAD tadIn(input.shapeInfo(), dimensions.data(), dimensions.size()); tadIn.createTadOnlyShapeInfo(); tadIn.createOffsets(); auto subArrIn = NDArray(input.getBuffer(), tadIn.tadOnlyShapeInfo, output.getContext()); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/percentile.cpp b/libnd4j/include/ops/declarable/helpers/cpu/percentile.cpp index 3ffa4dd82..dea46cd69 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/percentile.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/percentile.cpp @@ -69,7 +69,7 @@ static void _percentile(const NDArray& input, NDArray& output, std::vector& // FIXME: parallelism ! for(int i=0; i(flattenedArr.getBuffer()); + auto buff = reinterpret_cast(flattenedArr.buffer()); flattenedArr.assign(listOfSubArrs.at(i)); std::sort(buff, buff + len); output.p(i, flattenedArr.e(position)); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/prefix.cpp b/libnd4j/include/ops/declarable/helpers/cpu/prefix.cpp index 5307f841e..1afe03556 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/prefix.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/prefix.cpp @@ -27,7 +27,7 @@ namespace sd { namespace ops { namespace helpers { template - static void prefix_(scalar::Ops op, const void* vx, Nd4jLong* xShapeInfo, void* vz, Nd4jLong* zShapeInfo, bool exclusive, bool reverse) { + static void prefix_(scalar::Ops op, const void* vx, Nd4jLong const* xShapeInfo, void* vz, Nd4jLong const* zShapeInfo, bool exclusive, bool reverse) { const auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto length = shape::length(xShapeInfo); @@ -113,7 +113,7 @@ namespace sd { template static void prefix_(scalar::Ops op, const NDArray* x, NDArray* z, bool exclusive, bool reverse) { - prefix_(op, x->getBuffer(), x->getShapeInfo(), z->buffer(), z->shapeInfo(), exclusive, reverse); + prefix_(op, x->buffer(), x->shapeInfo(), z->buffer(), z->shapeInfo(), exclusive, reverse); }; void prefix(sd::LaunchContext * context, scalar::Ops op, const NDArray* x, NDArray* z, bool exclusive, bool reverse) { @@ -124,7 +124,7 @@ namespace sd { BUILD_SINGLE_SELECTOR(x->dataType(), prefix_, (op, x, z, dims, exclusive, reverse), LIBND4J_TYPES); } - BUILD_SINGLE_TEMPLATE(template void prefix_, (scalar::Ops op, const void* vx, Nd4jLong* xShapeInfo, void* vz, Nd4jLong* zShapeInfo, bool exclusive, bool reverse), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void prefix_, (scalar::Ops op, const void* vx, Nd4jLong const* xShapeInfo, void* vz, Nd4jLong const* zShapeInfo, bool exclusive, bool reverse), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void prefix_, (scalar::Ops op, const NDArray* x, NDArray* z, const std::vector& dims, bool exclusive, bool reverse), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void prefix_, (scalar::Ops op, const NDArray* x, NDArray* z, bool exclusive, bool reverse), LIBND4J_TYPES); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp index b38101feb..1e96211b3 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp @@ -34,11 +34,13 @@ namespace helpers { template void fillRandomGamma_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output) { - Nd4jLong* broadcasted = nullptr; - if (beta != nullptr) - ShapeUtils::evalBroadcastShapeInfo(*alpha, *beta, true, broadcasted, context->getWorkspace()); - else - broadcasted = alpha->shapeInfo(); + auto broadcasted = alpha->shapeInfo(); + if (beta != nullptr) { + const Nd4jLong* broadcastedShape = nullptr; + ShapeUtils::evalBroadcastShapeInfo(*alpha, *beta, true, broadcastedShape, context->getWorkspace()); + broadcasted = broadcastedShape; + } + auto step = shape::length(broadcasted); auto shift = output->lengthOf() / step; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp index 7323c3937..2e336da23 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp @@ -42,7 +42,7 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator& if(!isInplace) output.assign(input); } - else if (input.isVector() || shape::isLikeVector(input.getShapeInfo(), temp)) { + else if (input.isVector() || shape::isLikeVector(input.shapeInfo(), temp)) { // apply Fisher-Yates shuffle if(isInplace) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/random_crop.cpp b/libnd4j/include/ops/declarable/helpers/cpu/random_crop.cpp index 365465f64..34be299b7 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/random_crop.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/random_crop.cpp @@ -30,12 +30,12 @@ namespace helpers { template static int _randomCropFunctor(graph::Context& context, NDArray* input, NDArray* shape, NDArray* output, int seed) { graph::RandomGenerator rngX(context.getRng()); - //functions::random::RandomFunction::template execTransform>(rng, output->getBuffer(), output->getShapeInfo(), std::vector({T(0.), shape->e(last)}).data()); + //functions::random::RandomFunction::template execTransform>(rng, output->buffer(), output->shapeInfo(), std::vector({T(0.), shape->e(last)}).data()); //NativeOpExecutioner::execRandom(random::UniformDistribution, rng, output->buffer(), output->shapeInfo(), std::vector({T(0.), shape->e(last)}).data()); Nd4jLong last = shape->lengthOf() - 1; rngX.setSeed(seed); - //functions::random::RandomFunction::template execTransform>(rng, output->getBuffer(), output->getShapeInfo(), std::vector({T(0.), shape->getScalar(last)}).data()); + //functions::random::RandomFunction::template execTransform>(rng, output->buffer(), output->shapeInfo(), std::vector({T(0.), shape->getScalar(last)}).data()); for (Nd4jLong e = 0; e < output->lengthOf(); ++e) { output->p(e, rngX.relativeT(e, 0, shape->e(last))); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/range.cpp b/libnd4j/include/ops/declarable/helpers/cpu/range.cpp index e4349ac8a..eb2cbd760 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/range.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/range.cpp @@ -34,7 +34,7 @@ static void _range(const NDArray& start, const NDArray& delta, NDArray& outVecto const Nd4jLong len = outVector.lengthOf(); - auto buff = reinterpret_cast(outVector.getBuffer()); + auto buff = reinterpret_cast(outVector.buffer()); auto s = start.e(0); auto d = delta.e(0); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp b/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp index 3d17fb62a..95417dade 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp @@ -38,8 +38,8 @@ inline void swap(T* arr, Nd4jLong from, Nd4jLong to) { // this legacy op is written by raver119@gmail.com template -static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *inShapeBuffer, void *voutArr, Nd4jLong *outShapeBuffer, int numOfElemsToReverse = 0) { - auto inArr = reinterpret_cast(vinArr); +static void reverseArray(sd::LaunchContext * context, void const* vinArr, Nd4jLong const*inShapeBuffer, void *voutArr, Nd4jLong const*outShapeBuffer, int numOfElemsToReverse = 0) { + auto inArr = reinterpret_cast(vinArr); auto outArr = reinterpret_cast(voutArr); Nd4jLong inLength = shape::length(inShapeBuffer); @@ -56,7 +56,7 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) { auto idx = sLength - e; - swap(inArr, e, idx); + swap(const_cast(inArr), e, idx); } }; samediff::Threads::parallel_for(func, 0, numOfElemsToReverse / 2); @@ -66,7 +66,7 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in for (auto e = start; e < stop; e++) { auto idx1 = (sLength - e) * inEWS; Nd4jLong idx2 = e * inEWS; - swap(inArr, idx1, idx2); + swap(const_cast(inArr), idx1, idx2); } }; @@ -154,12 +154,12 @@ template static void reverseSequence_(sd::LaunchContext * context, const NDArray* input, const NDArray* seqLengths, NDArray* output, int seqDim, const int batchDim){ int posOfNonUnityDim = -1; - if(input->isVector() || shape::isLikeVector(input->getShapeInfo(), posOfNonUnityDim)) { + if(input->isVector() || shape::isLikeVector(input->shapeInfo(), posOfNonUnityDim)) { if((seqDim == 0 && input->sizeAt(0) == 1) || (batchDim == posOfNonUnityDim)) output->assign(input); else - helpers::reverseArray(context, const_cast(input)->getBuffer(), const_cast(input)->getShapeInfo(), output->getBuffer(), output->getShapeInfo(), seqLengths->e(0)); + helpers::reverseArray(context, const_cast(input)->buffer(), const_cast(input)->shapeInfo(), output->buffer(), output->shapeInfo(), seqLengths->e(0)); } else { @@ -182,7 +182,7 @@ static void reverseSequence_(sd::LaunchContext * context, const NDArray* input, auto inInnerSet = inSubArrsSet.at(i)->allTensorsAlongDimension({seqDim}); auto outInnerSet = outSubArrsSet.at(i)->allTensorsAlongDimension({seqDim}); for(int j = 0; j < inInnerSet.size(); ++j) - helpers::reverseArray(context, inInnerSet.at(j)->getBuffer(), inInnerSet.at(j)->getShapeInfo(), outInnerSet.at(j)->getBuffer(), outInnerSet.at(j)->getShapeInfo(), numOfElemsToReverse); + helpers::reverseArray(context, inInnerSet.at(j)->buffer(), inInnerSet.at(j)->shapeInfo(), outInnerSet.at(j)->buffer(), outInnerSet.at(j)->shapeInfo(), numOfElemsToReverse); } } } @@ -206,12 +206,12 @@ void reverse(sd::LaunchContext * context, const NDArray* input, NDArray* output, for(int i = 0; i < listIn.size(); ++i) { // listIn.size() = listOut.size() subArrIn = listIn.at(i); subArrOut = listOut.at(i); - BUILD_SINGLE_SELECTOR(input->dataType(), helpers::reverseArray, (context, subArrIn->getBuffer(), subArrIn->getShapeInfo(), subArrOut->getBuffer(), subArrOut->getShapeInfo()), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), helpers::reverseArray, (context, subArrIn->buffer(), subArrIn->shapeInfo(), subArrOut->buffer(), subArrOut->shapeInfo()), LIBND4J_TYPES); } } BUILD_SINGLE_TEMPLATE(template void reverseSequence_, (sd::LaunchContext * context, const NDArray* input, const NDArray* seqLengths, NDArray* output, int seqDim, const int batchDim), LIBND4J_TYPES); -BUILD_SINGLE_TEMPLATE(template void reverseArray, (sd::LaunchContext * context, void *inArr, Nd4jLong *inShapeBuffer, void *outArr, Nd4jLong *outShapeBuffer, int numOfElemsToReverse), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void reverseArray, (sd::LaunchContext * context, void const*inArr, Nd4jLong const*inShapeBuffer, void* outArr, Nd4jLong const* outShapeBuffer, int numOfElemsToReverse), LIBND4J_TYPES); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp b/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp index 6a854bba8..99a172c02 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp @@ -45,8 +45,8 @@ static void batchToSpace_(const NDArray& input, NDArray& output, const uint crop const int rank = 4; - const Nd4jLong* xShapeInfo = input.getShapeInfo(); - const Nd4jLong* zShapeInfo = output.getShapeInfo(); + const Nd4jLong* xShapeInfo = input.shapeInfo(); + const Nd4jLong* zShapeInfo = output.shapeInfo(); const uint bS = xShapeInfo[1]; const uint iH = xShapeInfo[2]; @@ -118,7 +118,7 @@ static void batchToSpaceND_(const NDArray& input, const NDArray& crop, NDArray& for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); memcpy(xCoords, zCoords, rank * sizeof(int)); @@ -126,8 +126,8 @@ static void batchToSpaceND_(const NDArray& input, const NDArray& crop, NDArray& for (uint j = 1; j <= numOfSpatialDims; ++j) xCoords[j] += crop.e(j - 1, 0); // add crop left - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); - const auto xOffset = shape::getOffset(input.getShapeInfo(), xCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); + const auto xOffset = shape::getOffset(input.shapeInfo(), xCoords); z[zOffset] = x[xOffset]; } @@ -211,8 +211,8 @@ static void spaceToBatch_(const NDArray& input, NDArray& output, const uint padB const int rank = 4; - const Nd4jLong* xShapeInfo = input.getShapeInfo(); - const Nd4jLong* zShapeInfo = output.getShapeInfo(); + const Nd4jLong* xShapeInfo = input.shapeInfo(); + const Nd4jLong* zShapeInfo = output.shapeInfo(); const uint bS = zShapeInfo[1]; const uint oH = zShapeInfo[2]; @@ -259,7 +259,7 @@ void spaceToBatch(sd::LaunchContext* context, const NDArray& input, NDArray& out NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), {input.sizeAt(0), output.sizeAt(1) * blockSize, output.sizeAt(2) * blockSize, output.sizeAt(3)}, false); BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatch_, (input, outputRearranged1, padBottom, padTop, padLeft, padRight), LIBND4J_TYPES); - if(output.getBuffer() != outputRearranged1.getBuffer()) + if(output.buffer() != outputRearranged1.buffer()) outputRearranged0.assign(outputRearranged1); } } @@ -309,9 +309,9 @@ static void spaceToBatchND_(const NDArray& input, const NDArray& padding, NDArra for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); memcpy(xCoords, zCoords, rank * sizeof(int)); @@ -331,7 +331,7 @@ static void spaceToBatchND_(const NDArray& input, const NDArray& padding, NDArra } if (within) - z[zOffset] = x[shape::getOffset(input.getShapeInfo(), xCoords)]; + z[zOffset] = x[shape::getOffset(input.shapeInfo(), xCoords)]; else z[zOffset] = 0.f; } @@ -396,7 +396,7 @@ void spaceToBatchND(sd::LaunchContext* context, const NDArray& input, const NDAr BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatchND_, (input, padding, outputRearranged1, numOfSpatialDims), LIBND4J_TYPES); - if(output.getBuffer() != outputRearranged1.getBuffer()) + if(output.buffer() != outputRearranged1.buffer()) outputRearranged0.assign(outputRearranged1); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp b/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp index 5668ea422..b51a4adc9 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp @@ -26,7 +26,7 @@ namespace ops { namespace helpers { template static void _spaceTodepth_(const NDArray &input, NDArray *output, int block_size, bool isNHWC) { - auto input_ptr = reinterpret_cast(input.getBuffer()); + auto input_ptr = reinterpret_cast(input.buffer()); auto output_ptr = reinterpret_cast(output->buffer()); const int batch_size = input.sizeAt(0); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp index dd83a8618..e19eb5dea 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp @@ -36,8 +36,8 @@ Nd4jLong checkIndices_(const NDArray& indices, const NDArray& output, const int const auto x = indices.bufferAsT(); - const auto xShapeInfo = indices.getShapeInfo(); - const auto zShapeInfo = output.getShapeInfo(); + const auto xShapeInfo = indices.shapeInfo(); + const auto zShapeInfo = output.shapeInfo(); const auto xRank = indices.rankOf(); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp index e2c0f5183..bfd44629c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp @@ -30,10 +30,10 @@ namespace sd { namespace helpers { template - static void softMaxForVector_(void *input, Nd4jLong *inShapeInfo, void *output, Nd4jLong *outShapeInfo) { + static void softMaxForVector_(void const* input, Nd4jLong const* inShapeInfo, void *output, Nd4jLong const* outShapeInfo) { - T* inBuff = reinterpret_cast(input); - T* outBuff = reinterpret_cast(output); + auto inBuff = reinterpret_cast(input); + auto outBuff = reinterpret_cast(output); T max = -DataTypeUtils::max(); T sum = 0.; @@ -80,15 +80,16 @@ namespace sd { throw std::runtime_error("ops::helpers::softMaxForVector function: input and output arrays must be vectors !"); auto xType = input.dataType(); - BUILD_SINGLE_SELECTOR(xType, softMaxForVector_, (input.getBuffer(), input.getShapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(xType, softMaxForVector_, (input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); } template - void softmax_loop(T *input, T *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen); + void softmax_loop(const T* input, T *output, const Nd4jLong * offsets, Nd4jLong numOfSubArrs, uint32_t tadLen); + #ifdef _OPENMP template <> - FORCEINLINE void softmax_loop(float *input, float *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { -#pragma omp parallel for + FORCEINLINE void softmax_loop(const float* input, float *output, const Nd4jLong * offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { +#pragma omp parallel for default(shared) for (Nd4jLong i = 0; i < numOfSubArrs; i++) { auto inBuff = input + offsets[i]; auto outBuff = output + offsets[i]; @@ -113,7 +114,7 @@ namespace sd { } #else template <> - FORCEINLINE void softmax_loop(float *input, float *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { + FORCEINLINE void softmax_loop(const float *input, float *output, const Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { auto inBuff = input + offsets[i]; @@ -143,7 +144,7 @@ namespace sd { template - FORCEINLINE void softmax_loop(T *input, T *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { + FORCEINLINE void softmax_loop(const T *input, T *output, const Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { auto inBuff = input + offsets[i]; @@ -180,20 +181,20 @@ namespace sd { if(input.isVector()) { if(rank == 1 || input.sizeAt(dimension) != 1) - softMaxForVector_(input.getBuffer(), input.getShapeInfo(), output.buffer(), output.getShapeInfo()); + softMaxForVector_(input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo()); else output = 1.; } else if(input.isSameShapeStrict(output)) { - TadPack tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimension); - Nd4jLong* tadShapeInfo = tadPack.primaryShapeInfo(); - Nd4jLong* tadOffsets = tadPack.primaryOffsets(); + TadPack tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimension); + auto tadShapeInfo = tadPack.primaryShapeInfo(); + auto tadOffsets = tadPack.primaryOffsets(); const uint numOfSubArrs = tadPack.numberOfTads(); const uint tadLen = shape::length(tadShapeInfo); if(shape::elementWiseStride(tadShapeInfo) == 1){ - T *inBuff = input.bufferAsT(); + auto inBuff = input.bufferAsT(); T *outBuff = output.bufferAsT(); softmax_loop(inBuff, outBuff, tadOffsets, numOfSubArrs, tadLen); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/split.cpp b/libnd4j/include/ops/declarable/helpers/cpu/split.cpp index 2e30cdf0a..48c6c4903 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/split.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/split.cpp @@ -33,7 +33,7 @@ namespace helpers { const auto sizeofT = input.sizeOfT(); - T* xBuff = input.bufferAsT(); + auto xBuff = input.bufferAsT(); bool luckCase1 = ((axis == 0 && input.ordering() == 'c') || (axis == input.rankOf() - 1 && input.ordering() == 'f')) && input.ews() == 1; @@ -77,7 +77,7 @@ namespace helpers { for (Nd4jLong i = 0; i < input.lengthOf() / xDim; ++i) { - T* x = xBuff + xDim * i; + auto x = xBuff + xDim * i; for (uint j = 0; j < numSplits; ++j) { const auto zDim = outArrs[j]->sizeAt(axis); @@ -100,8 +100,8 @@ namespace helpers { for (auto i = start; i < stop; i += increment) { - shape::index2coordsCPU(start, i, input.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(input.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, input.shapeInfo(), coords); + const auto xOffset = shape::getOffset(input.shapeInfo(), coords); uint outArrIdx = 0; @@ -113,7 +113,7 @@ namespace helpers { } T* z = outArrs[outArrIdx]->bufferAsT(); - const auto zOffset = shape::getOffset(outArrs[outArrIdx]->getShapeInfo(), coords); + const auto zOffset = shape::getOffset(outArrs[outArrIdx]->shapeInfo(), coords); z[zOffset] = xBuff[xOffset]; coords[axis] = temp; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp index f8fc07201..694ced4cb 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp @@ -47,8 +47,8 @@ static void stack_(const std::vector& inArrs, NDArray& output, c } else { - auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); - Nd4jLong* zTadShapeInfo = zTadPack.primaryShapeInfo(); + auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); + auto zTadShapeInfo = zTadPack.primaryShapeInfo(); auto func = PRAGMA_THREADS_FOR { @@ -57,7 +57,7 @@ static void stack_(const std::vector& inArrs, NDArray& output, c void* zBuff = output.bufferWithOffset(zTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(inArrs[0]->getContext(), transform::Assign, - inArrs[i]->getBuffer(), inArrs[i]->getShapeInfo(), nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, + inArrs[i]->buffer(), inArrs[i]->shapeInfo(), nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, zBuff, zTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, nullptr, nullptr, nullptr, false/*allowParallelism*/); } @@ -92,17 +92,16 @@ static void unstack_(const NDArray& input, const std::vector& outArrs, } else { - auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); - Nd4jLong* xTadShapeInfo = xTadPack.primaryShapeInfo(); + auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); + auto xTadShapeInfo = xTadPack.primaryShapeInfo(); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - - void* xBuff = input.bufferWithOffset(xTadPack.primaryOffsets()[i]); + auto xBuff = input.bufferWithOffset(xTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input.getContext(), transform::Assign, xBuff, xTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, - outArrs[i]->getBuffer(), outArrs[i]->getShapeInfo(), nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, + outArrs[i]->buffer(), outArrs[i]->shapeInfo(), nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, nullptr, nullptr, nullptr, false/*allowParallelism*/); } }; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/tile.cpp b/libnd4j/include/ops/declarable/helpers/cpu/tile.cpp index 8f2a10bc9..4edb9e2a0 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/tile.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/tile.cpp @@ -31,8 +31,8 @@ namespace helpers { template static void tileBP_(const NDArray& gradO /*input*/, NDArray& gradI /*output*/, const std::vector reps) { - T* gradIBuff = reinterpret_cast(gradI.getBuffer()); - const T* gradOBuff = reinterpret_cast(gradO.getBuffer()); + T* gradIBuff = reinterpret_cast(gradI.buffer()); + auto gradOBuff = reinterpret_cast(gradO.buffer()); const Nd4jLong gradILen = gradI.lengthOf(); const Nd4jLong gradOLen = gradO.lengthOf(); // gradOLen >= gradILen const Nd4jLong gradIEWS = sd::math::nd4j_abs(gradI.ews()); @@ -52,7 +52,7 @@ static void tileBP_(const NDArray& gradO /*input*/, NDArray& gradI /*output*/, c //PRAGMA_OMP_PARALLEL_FOR_SIMD for(Nd4jLong i=0; i(idx) + gradOBuff[i]); } } @@ -60,7 +60,7 @@ static void tileBP_(const NDArray& gradO /*input*/, NDArray& gradI /*output*/, c //PRAGMA_OMP_PARALLEL_FOR_SIMD for(Nd4jLong i=0; i(idx) + gradOBuff[i * gradOEWS]); } } @@ -69,8 +69,8 @@ static void tileBP_(const NDArray& gradO /*input*/, NDArray& gradI /*output*/, c //PRAGMA_OMP_PARALLEL_FOR_SIMD for(Nd4jLong i=0; i(fidx) + gradOBuff[shape::getIndexOffset(i, gradO.getShapeInfo())]); + auto fidx = shape::subArrayIndex(i, gradO.shapeInfo(), gradI.shapeInfo()); + gradI.p(fidx, gradI.e(fidx) + gradOBuff[shape::getIndexOffset(i, gradO.shapeInfo())]); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp index 78b06d71e..fdab43261 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp @@ -42,7 +42,7 @@ namespace helpers { for (size_t d = 0; d < dimsToExclude.size(); ++d) dimsToExclude[d] = d; - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input->getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input->shapeInfo(), dimsToExclude); if (k == 1) { for (Nd4jLong e = 0; e < numOfSubArrs; ++e) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaDelta.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaDelta.cpp index e80018348..78268b2dc 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaDelta.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaDelta.cpp @@ -67,23 +67,23 @@ static void adaDeltaUpdater_(const NDArray& gradient, const NDArray& initStateMs } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInMsgSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateMsg.getShapeInfo()); - bool bXStMsgSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateMsg.getShapeInfo()); - bool bXInMsdxSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateMsdx.getShapeInfo()); - bool bXStMsdxSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateMsdx.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInMsgSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateMsg.shapeInfo()); + bool bXStMsgSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateMsg.shapeInfo()); + bool bXInMsdxSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateMsdx.shapeInfo()); + bool bXStMsdxSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateMsdx.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < gradient.lengthOf(); i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initMsgOffset = bXInMsgSame ? xOffset : shape::getOffset(initStateMsg.getShapeInfo(), coords); - const auto stMsgOffset = bXStMsgSame ? xOffset : shape::getOffset(stateMsg.getShapeInfo(), coords); - const auto initMsdxOffset = bXInMsdxSame ? xOffset : shape::getOffset(initStateMsdx.getShapeInfo(), coords); - const auto stMsdxOffset = bXStMsdxSame ? xOffset : shape::getOffset(stateMsdx.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initMsgOffset = bXInMsgSame ? xOffset : shape::getOffset(initStateMsg.shapeInfo(), coords); + const auto stMsgOffset = bXStMsgSame ? xOffset : shape::getOffset(stateMsg.shapeInfo(), coords); + const auto initMsdxOffset = bXInMsdxSame ? xOffset : shape::getOffset(initStateMsdx.shapeInfo(), coords); + const auto stMsdxOffset = bXStMsdxSame ? xOffset : shape::getOffset(stateMsdx.shapeInfo(), coords); stMsg[stMsgOffset] = rho * initMsg[initMsgOffset] + grad[xOffset] * grad[xOffset] * rhoT; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaGrad.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaGrad.cpp index 280597d31..e65f34e72 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaGrad.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaGrad.cpp @@ -56,21 +56,21 @@ static void adaGradUpdater_(const NDArray& gradient, const NDArray& initState, N return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initState.getShapeInfo()); - bool bXStSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateH.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initState.shapeInfo()); + bool bXStSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateH.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.getShapeInfo(), coords); - const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateH.getShapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.shapeInfo(), coords); + const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateH.shapeInfo(), coords); st[stOffset] = init[initOffset] + grad[xOffset] * grad[xOffset]; up[zOffset] = (lr * grad[xOffset]) / (math::nd4j_sqrt(st[stOffset]) + epsilon); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaMax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaMax.cpp index ae986f901..6c7d0d322 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaMax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaMax.cpp @@ -73,23 +73,23 @@ static void adaMaxUpdater_(const NDArray& gradient, const NDArray& initStateU, c return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateU.getShapeInfo()); - bool bXStVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateU.getShapeInfo()); - bool bXInMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateM.getShapeInfo()); - bool bXStMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateM.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateU.shapeInfo()); + bool bXStVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateU.shapeInfo()); + bool bXInMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateM.shapeInfo()); + bool bXStMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateM.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initUOffset = bXInVSame ? xOffset : shape::getOffset(initStateU.getShapeInfo(), coords); - const auto stUOffset = bXStVSame ? xOffset : shape::getOffset(stateU.getShapeInfo(), coords); - const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.getShapeInfo(), coords); - const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initUOffset = bXInVSame ? xOffset : shape::getOffset(initStateU.shapeInfo(), coords); + const auto stUOffset = bXStVSame ? xOffset : shape::getOffset(stateU.shapeInfo(), coords); + const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.shapeInfo(), coords); + const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.shapeInfo(), coords); //m = B_1 * m + (1-B_1)*grad stM[stMOffset] = beta1 * initM[initMOffset] + grad[xOffset] * (1 - beta1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdam.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdam.cpp index b8eab1e6f..2d670949f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdam.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdam.cpp @@ -75,23 +75,23 @@ static void adamUpdater_(const NDArray& gradient, const NDArray& initStateU, con return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateU.getShapeInfo()); - bool bXStVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateU.getShapeInfo()); - bool bXInMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateM.getShapeInfo()); - bool bXStMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateM.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateU.shapeInfo()); + bool bXStVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateU.shapeInfo()); + bool bXInMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateM.shapeInfo()); + bool bXStMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateM.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initUOffset = bXInVSame ? xOffset : shape::getOffset(initStateU.getShapeInfo(), coords); - const auto stUOffset = bXStVSame ? xOffset : shape::getOffset(stateU.getShapeInfo(), coords); - const auto initMOffset = bXInVSame ? xOffset : shape::getOffset(initStateM.getShapeInfo(), coords); - const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initUOffset = bXInVSame ? xOffset : shape::getOffset(initStateU.shapeInfo(), coords); + const auto stUOffset = bXStVSame ? xOffset : shape::getOffset(stateU.shapeInfo(), coords); + const auto initMOffset = bXInVSame ? xOffset : shape::getOffset(initStateM.shapeInfo(), coords); + const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.shapeInfo(), coords); stM[stMOffset] = beta1 * initM[initMOffset] + grad[xOffset] * (1 - beta1); stU[stUOffset] = beta2 * initU[initUOffset] + grad[xOffset] * grad[xOffset] * (1 - beta2); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAmsGrad.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAmsGrad.cpp index 686c22cbe..7cb05075c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAmsGrad.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAmsGrad.cpp @@ -81,27 +81,27 @@ static void amsGradUpdater_(const NDArray& gradient, const NDArray& initStateV, return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateV.getShapeInfo()); - bool bXStVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateV.getShapeInfo()); - bool bXInMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateM.getShapeInfo()); - bool bXStMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateM.getShapeInfo()); - bool bXInHSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateH.getShapeInfo()); - bool bXStHSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateH.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateV.shapeInfo()); + bool bXStVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateV.shapeInfo()); + bool bXInMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateM.shapeInfo()); + bool bXStMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateM.shapeInfo()); + bool bXInHSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateH.shapeInfo()); + bool bXStHSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateH.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initVOffset = bXInVSame ? xOffset : shape::getOffset(initStateV.getShapeInfo(), coords); - const auto stVOffset = bXStVSame ? xOffset : shape::getOffset(stateV.getShapeInfo(), coords); - const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.getShapeInfo(), coords); - const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.getShapeInfo(), coords); - const auto initHOffset = bXInHSame ? xOffset : shape::getOffset(initStateH.getShapeInfo(), coords); - const auto stHOffset = bXStHSame ? xOffset : shape::getOffset(stateH.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initVOffset = bXInVSame ? xOffset : shape::getOffset(initStateV.shapeInfo(), coords); + const auto stVOffset = bXStVSame ? xOffset : shape::getOffset(stateV.shapeInfo(), coords); + const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.shapeInfo(), coords); + const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.shapeInfo(), coords); + const auto initHOffset = bXInHSame ? xOffset : shape::getOffset(initStateH.shapeInfo(), coords); + const auto stHOffset = bXStHSame ? xOffset : shape::getOffset(stateH.shapeInfo(), coords); stM[stMOffset] = beta1 * initM[initMOffset] + grad[xOffset] * mbeta1; stV[stVOffset] = beta2 * initV[initVOffset] + grad[xOffset] * grad[xOffset] * mbeta2; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterNadam.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterNadam.cpp index 82ade0f16..40f9c9407 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterNadam.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterNadam.cpp @@ -74,23 +74,23 @@ static void nadamUpdater_(const NDArray& gradient, const NDArray& initStateV, co return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateV.getShapeInfo()); - bool bXStVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateV.getShapeInfo()); - bool bXInMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateM.getShapeInfo()); - bool bXStMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateM.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateV.shapeInfo()); + bool bXStVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateV.shapeInfo()); + bool bXInMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateM.shapeInfo()); + bool bXStMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateM.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initVOffset = bXInVSame ? xOffset : shape::getOffset(initStateV.getShapeInfo(), coords); - const auto stVOffset = bXStVSame ? xOffset : shape::getOffset(stateV.getShapeInfo(), coords); - const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.getShapeInfo(), coords); - const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initVOffset = bXInVSame ? xOffset : shape::getOffset(initStateV.shapeInfo(), coords); + const auto stVOffset = bXStVSame ? xOffset : shape::getOffset(stateV.shapeInfo(), coords); + const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.shapeInfo(), coords); + const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.shapeInfo(), coords); auto oneMinusBeta1Grad = grad[xOffset] * mbeta1; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterNesterovs.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterNesterovs.cpp index 82e21ace7..1d8bb8d45 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterNesterovs.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterNesterovs.cpp @@ -58,19 +58,19 @@ static void nesterovsUpdater_(const NDArray& gradient, const NDArray& initState, return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initState.getShapeInfo()); - bool bXStSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateV.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initState.shapeInfo()); + bool bXStSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateV.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.getShapeInfo(), coords); - const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateV.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.shapeInfo(), coords); + const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateV.shapeInfo(), coords); T prevState = momentum * init[initOffset]; st[stOffset] = prevState - lr * grad[xOffset]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterRmsProp.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterRmsProp.cpp index a0b9f731e..473b43cf8 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterRmsProp.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterRmsProp.cpp @@ -57,19 +57,19 @@ static void rmsPropUpdater_(const NDArray& gradient, const NDArray& initState, N return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initState.getShapeInfo()); - bool bXStSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateG.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initState.shapeInfo()); + bool bXStSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateG.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.getShapeInfo(), coords); - const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateG.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.shapeInfo(), coords); + const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateG.shapeInfo(), coords); st[stOffset] = init[initOffset] * rmsDecay + grad[xOffset] * grad[xOffset] * (1 - rmsDecay) ; up[zOffset] = (lr * grad[xOffset]) / ( math::nd4j_sqrt(st[stOffset]) + epsilon); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/BarnesHutTsne.cu b/libnd4j/include/ops/declarable/helpers/cuda/BarnesHutTsne.cu index 71eef3386..70ff75b96 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/BarnesHutTsne.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/BarnesHutTsne.cu @@ -56,8 +56,8 @@ namespace helpers { Nd4jLong barnes_row_count(const NDArray* rowP, const NDArray* colP, Nd4jLong N, NDArray& rowCounts) { int* pRowCounts = reinterpret_cast(rowCounts.specialBuffer()); - int const* pRows = reinterpret_cast(rowP->getSpecialBuffer()); - int const* pCols = reinterpret_cast(colP->getSpecialBuffer()); + int const* pRows = reinterpret_cast(rowP->specialBuffer()); + int const* pCols = reinterpret_cast(colP->specialBuffer()); auto stream = rowCounts.getContext()->getCudaStream(); countRowsKernel<<<1, 1, 128, *stream>>>(pRowCounts, pRows, pCols, N); NDArray numElementsArr = rowCounts.sumNumber(); //reduceAlongDimension(reduce::Sum, {}); @@ -146,7 +146,7 @@ namespace helpers { // template static void barnes_symmetrize_(const NDArray* rowP, const NDArray* colP, const NDArray* valP, Nd4jLong N, NDArray* outputRows, NDArray* outputCols, NDArray* outputVals, NDArray* rowCounts) { - int const* pRows = reinterpret_cast(rowP->getSpecialBuffer()); + int const* pRows = reinterpret_cast(rowP->specialBuffer()); int* symRowP = reinterpret_cast(outputRows->specialBuffer()); int* pRowCounts = reinterpret_cast(rowCounts->specialBuffer()); auto stream = outputCols->getContext()->getCudaStream(); @@ -156,8 +156,8 @@ namespace helpers { // outputRows->printBuffer("output rows"); int* symColP = reinterpret_cast(outputCols->specialBuffer()); // outputRows->printBuffer("SymRows are"); - int const* pCols = reinterpret_cast(colP->getSpecialBuffer()); - T const* pVals = reinterpret_cast(valP->getSpecialBuffer()); + int const* pCols = reinterpret_cast(colP->specialBuffer()); + T const* pVals = reinterpret_cast(valP->specialBuffer()); T* pOutput = reinterpret_cast(outputVals->specialBuffer()); //std::vector rowCountsV = rowCounts->getBufferAsVector(); auto offsetArr = NDArrayFactory::create('c', {N}); @@ -211,11 +211,11 @@ namespace helpers { template static void barnes_edge_forces_(const NDArray* rowP, NDArray const* colP, NDArray const* valP, int N, NDArray const* data, NDArray* output) { NDArray::prepareSpecialUse({output}, {data, rowP, colP, valP, valP}); - T const* dataP = reinterpret_cast(data->getSpecialBuffer()); - T const* vals = reinterpret_cast(valP->getSpecialBuffer()); + T const* dataP = reinterpret_cast(data->specialBuffer()); + T const* vals = reinterpret_cast(valP->specialBuffer()); T* outputP = reinterpret_cast(output->specialBuffer()); - int const* pRows = reinterpret_cast(rowP->getSpecialBuffer()); - int const* pCols = reinterpret_cast(colP->getSpecialBuffer()); + int const* pRows = reinterpret_cast(rowP->specialBuffer()); + int const* pCols = reinterpret_cast(colP->specialBuffer()); int colCount = data->columns(); //auto shift = 0; auto rowSize = sizeof(T) * colCount; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/activations.cu b/libnd4j/include/ops/declarable/helpers/cuda/activations.cu index 4243c6e04..c8bc709a0 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/activations.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/activations.cu @@ -91,7 +91,7 @@ void prelu(sd::LaunchContext * context, const NDArray& input, const NDArray& alp const auto yType = alpha.dataType(); NDArray::prepareSpecialUse({&output}, {&input, &alpha}); - BUILD_SINGLE_SELECTOR_TWICE(xType, preluCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), alpha.getSpecialBuffer(), alpha.getSpecialShapeInfo(), output.getSpecialBuffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(xType, preluCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), alpha.specialBuffer(), alpha.specialShapeInfo(), output.specialBuffer()), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input, &alpha}); manager.synchronize(); @@ -175,7 +175,7 @@ void preluBP(sd::LaunchContext* context, const NDArray& input, const NDArray& al const auto zType = alpha.dataType(); NDArray::prepareSpecialUse({&dLdI, &dLdA}, {&input, &alpha, &dLdO}); - BUILD_SINGLE_SELECTOR_TWICE(xType, preluBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), alpha.getSpecialBuffer(), alpha.getSpecialShapeInfo(), dLdO.getSpecialBuffer(), dLdO.getSpecialShapeInfo(), dLdI.getSpecialBuffer(), dLdI.getSpecialShapeInfo(), dLdA.getSpecialBuffer(), dLdA.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(xType, preluBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), alpha.specialBuffer(), alpha.specialShapeInfo(), dLdO.specialBuffer(), dLdO.specialShapeInfo(), dLdI.specialBuffer(), dLdI.specialShapeInfo(), dLdA.specialBuffer(), dLdA.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&dLdI, &dLdA}, {&input, &alpha, &dLdO}); manager.synchronize(); @@ -313,7 +313,7 @@ void softmax(sd::LaunchContext * context, const NDArray& input, NDArray& output, if(rank == 1 || input.sizeAt(dimension) != 1) { NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), softMaxForVectorCudaLauncher, (context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), softMaxForVectorCudaLauncher, (context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); } else @@ -321,15 +321,15 @@ void softmax(sd::LaunchContext * context, const NDArray& input, NDArray& output, } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), {dimension}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), {dimension}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {dimension}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), {dimension}); const int threadsPerBlock = MAX_NUM_THREADS / 4; const int blocksPerGrid = packZ.numberOfTads(); const int sharedMem = input.sizeOfT() * threadsPerBlock + 512; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), softMaxCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), packX.specialShapeInfo(), packX.specialOffsets(), output.specialBuffer(), packZ.specialShapeInfo(), packZ.specialOffsets()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), softMaxCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), packX.specialShapeInfo(), packX.specialOffsets(), output.specialBuffer(), packZ.specialShapeInfo(), packZ.specialOffsets()), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); // auto maxAlongDim = const_cast(input).reduceAlongDimension(reduce::Max, {dimension}, true); @@ -442,7 +442,7 @@ void logSoftmax(sd::LaunchContext * context, const NDArray& input, NDArray& outp if(input.isVector()) { if(rank == 1 || input.sizeAt(dimension) != 1) { - BUILD_SINGLE_SELECTOR(input.dataType(), logSoftMaxForVectorCudaLauncher, (context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), logSoftMaxForVectorCudaLauncher, (context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer()), FLOAT_TYPES); input.tickReadDevice(); } else @@ -560,9 +560,9 @@ void softmaxDerivative(sd::LaunchContext * context, const NDArray& input, NDArra const int rank = input.rankOf(); int temp; - if(shape::isCommonVector(input.getShapeInfo(), temp)) { + if(shape::isCommonVector(input.shapeInfo(), temp)) { - BUILD_SINGLE_SELECTOR(input.dataType(), softMaxDerivForVectorCudaLauncher, (context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), softMaxDerivForVectorCudaLauncher, (context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer()), FLOAT_TYPES); input.tickReadDevice(); } else { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/addBias.cu b/libnd4j/include/ops/declarable/helpers/cuda/addBias.cu index 0878a1c77..18474f2c7 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/addBias.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/addBias.cu @@ -126,7 +126,7 @@ void addBias(sd::graph::Context& block, const NDArray& input, const NDArray& bia if (input.rankOf() == 2 && bias.rankOf() == 1 && input.ordering() == 'c' && output.ordering() == 'c' && input.ews() == 1 && bias.ews() == 1 && input.sizeAt(1) == bias.sizeAt(0)) { BUILD_DOUBLE_SELECTOR(input.dataType(), bias.dataType(), addBias2DCudaLauncher, - (block.launchContext()->getCudaStream(), input.getSpecialBuffer(), bias.getSpecialBuffer(), output.specialBuffer(), input.sizeAt(0), bias.sizeAt(0)), + (block.launchContext()->getCudaStream(), input.specialBuffer(), bias.specialBuffer(), output.specialBuffer(), input.sizeAt(0), bias.sizeAt(0)), FLOAT_TYPES, FLOAT_TYPES); } else { // default case @@ -136,7 +136,7 @@ void addBias(sd::graph::Context& block, const NDArray& input, const NDArray& bia BUILD_DOUBLE_SELECTOR(input.dataType(), bias.dataType(), addBiasCudaLauncher, - (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), bias.getSpecialBuffer(), bias.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), isNCHW), + (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), bias.specialBuffer(), bias.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), isNCHW), FLOAT_TYPES, FLOAT_TYPES); } NDArray::registerSpecialUse({&output}, {&input, &bias}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu b/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu index 0b3681663..9ce00f318 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu @@ -81,8 +81,8 @@ static _CUDA_H void adjustHueCudaLauncher(const int blocksPerGrid, const int thr //////////////////////////////////////////////////////////////////////// void adjustHue(sd::LaunchContext* context, const NDArray *input, const NDArray* deltaScalarArr, NDArray *output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -92,7 +92,7 @@ void adjustHue(sd::LaunchContext* context, const NDArray *input, const NDArray* PointersManager manager(context, "adjustHue"); NDArray::prepareSpecialUse({output}, {input, deltaScalarArr}); - BUILD_SINGLE_SELECTOR(input->dataType(), adjustHueCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, deltaScalarArr, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), adjustHueCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, deltaScalarArr, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input, deltaScalarArr}); manager.synchronize(); @@ -173,8 +173,8 @@ static void _adjust_hue_single(sd::LaunchContext * context, NDArray *array, NDAr adjustHueSingleNHWCKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), tuples, delta); } else { // TODO: check this one - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->getShapeInfo(), {1, 2}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {1, 2}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {1, 2}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {1, 2}); auto tadLength = shape::length(packX.primaryShapeInfo()); @@ -195,8 +195,8 @@ static void _adjust_hue_batch(sd::LaunchContext * context, NDArray *array, NDArr BUILD_SINGLE_SELECTOR(xType, _adjust_hue_single, (context, array, output, delta, isNHWC);, FLOAT_TYPES); } else { // TODO: check this one - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->getShapeInfo(), {0, 2, 3}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {0, 2, 3}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {0, 2, 3}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {0, 2, 3}); auto tadLength = shape::length(packX.primaryShapeInfo()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu b/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu index f2da480cb..fd413f8cd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu @@ -83,8 +83,8 @@ static _CUDA_H void adjustSaturationCudaLauncher(const int blocksPerGrid, const //////////////////////////////////////////////////////////////////////// void adjustSaturation(sd::LaunchContext* context, const NDArray *input, const NDArray* factorScalarArr, NDArray *output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -94,7 +94,7 @@ void adjustSaturation(sd::LaunchContext* context, const NDArray *input, const ND PointersManager manager(context, "adjustSaturation"); NDArray::prepareSpecialUse({output}, {input, factorScalarArr}); - BUILD_SINGLE_SELECTOR(input->dataType(), adjustSaturationCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, factorScalarArr, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), adjustSaturationCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, factorScalarArr, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input, factorScalarArr}); manager.synchronize(); @@ -164,8 +164,8 @@ static void _adjust_saturation_single(sd::LaunchContext * context, NDArray *arra if (isNHWC) { adjustSaturationSingleNHWCKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), tuples, delta); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->getShapeInfo(), {1, 2}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {1, 2}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {1, 2}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {1, 2}); auto tadLength = shape::length(packX.primaryShapeInfo()); @@ -185,8 +185,8 @@ static void _adjust_saturation_batch(sd::LaunchContext * context, NDArray *array BUILD_SINGLE_SELECTOR(xType, _adjust_saturation_single, (context, array, output, delta, isNHWC);, FLOAT_TYPES); } else { // TODO: check this one - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->getShapeInfo(), {0, 2, 3}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {0, 2, 3}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {0, 2, 3}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {0, 2, 3}); auto tadLength = shape::length(packX.primaryShapeInfo()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/batched_gemm.cu b/libnd4j/include/ops/declarable/helpers/cuda/batched_gemm.cu index b5447b411..40540f65d 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/batched_gemm.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/batched_gemm.cu @@ -87,9 +87,9 @@ void bgemm(const std::vector& vA, const std::vector& vB, std std::vector pAbuffs(bS), pBbuffs(bS), pCbuffs(bS); for(int i = 0; i < bS; ++i) { - pAbuffs[i] = pA[i]->getSpecialBuffer(); - pBbuffs[i] = pB[i]->getSpecialBuffer(); - pCbuffs[i] = pC[i]->getSpecialBuffer(); + pAbuffs[i] = pA[i]->specialBuffer(); + pBbuffs[i] = pB[i]->specialBuffer(); + pCbuffs[i] = pC[i]->specialBuffer(); } sd::LaunchContext* context = vA[0]->getContext(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu b/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu index 2daac26c3..791953ab7 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu @@ -201,7 +201,7 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc // std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(input->rankOf(), axes); - // auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimsToExclude); + // auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimsToExclude); // auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimsToExclude); // const int threadsPerBlock = MAX_NUM_THREADS / 2; @@ -210,7 +210,7 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc // PointersManager manager(input->getContext(), "batchnorm"); // NDArray::prepareSpecialUse({output}, {input, mean, variance, gamma, beta}); - // BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), mean->getSpecialBuffer(), mean->getSpecialShapeInfo(), variance->getSpecialBuffer(), variance->getSpecialShapeInfo(), gamma ? gamma->getSpecialBuffer() : nullptr, gamma ? gamma->getSpecialShapeInfo() : nullptr, beta ? beta->getSpecialBuffer() : nullptr, beta ? beta->getSpecialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), packZ.platformShapeInfo(), packZ.platformOffsets(), epsilon), FLOAT_TYPES); + // BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), mean->specialBuffer(), mean->specialShapeInfo(), variance->specialBuffer(), variance->specialShapeInfo(), gamma ? gamma->specialBuffer() : nullptr, gamma ? gamma->specialShapeInfo() : nullptr, beta ? beta->specialBuffer() : nullptr, beta ? beta->specialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), packZ.platformShapeInfo(), packZ.platformOffsets(), epsilon), FLOAT_TYPES); // NDArray::registerSpecialUse({output}, {input, mean, variance, gamma, beta}); // manager.synchronize(); @@ -224,7 +224,7 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc const int* dims = reinterpret_cast(manager.replicatePointer(axes.data(), axes.size() * sizeof(int))); NDArray::prepareSpecialUse({output}, {input, mean, variance, gamma, beta}); - BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher2, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), mean->getSpecialBuffer(), mean->getSpecialShapeInfo(), variance->getSpecialBuffer(), variance->getSpecialShapeInfo(), gamma ? gamma->getSpecialBuffer() : nullptr, gamma ? gamma->getSpecialShapeInfo() : nullptr, beta ? beta->getSpecialBuffer() : nullptr, beta ? beta->getSpecialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), axes.size(), dims, epsilon), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher2, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), mean->specialBuffer(), mean->specialShapeInfo(), variance->specialBuffer(), variance->specialShapeInfo(), gamma ? gamma->specialBuffer() : nullptr, gamma ? gamma->specialShapeInfo() : nullptr, beta ? beta->specialBuffer() : nullptr, beta ? beta->specialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), axes.size(), dims, epsilon), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input, mean, variance, gamma, beta}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/betaInc.cu b/libnd4j/include/ops/declarable/helpers/cuda/betaInc.cu index f1407f9e8..a18ec1fda 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/betaInc.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/betaInc.cu @@ -181,7 +181,7 @@ void betaInc(sd::LaunchContext* context, const NDArray& a, const NDArray& b, con PointersManager manager(context, "betaInc"); NDArray::prepareSpecialUse({&output}, {&a, &b, &x}); - BUILD_SINGLE_SELECTOR(xType, betaIncForArrayCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), a.getSpecialBuffer(), a.getSpecialShapeInfo(), b.getSpecialBuffer(), b.getSpecialShapeInfo(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(xType, betaIncForArrayCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), a.specialBuffer(), a.specialShapeInfo(), b.specialBuffer(), b.specialShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&a, &b, &x}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/col2im.cu b/libnd4j/include/ops/declarable/helpers/cuda/col2im.cu index 878ce3a6a..62f60cc73 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/col2im.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/col2im.cu @@ -193,7 +193,7 @@ void col2im(sd::LaunchContext& context, const NDArray& col, NDArray& im, const i const int sharedMem = col.rankOf() * sizeof(uint) * threadsPerBlock + 256; NDArray::prepareSpecialUse({&im}, {&col}); - BUILD_SINGLE_SELECTOR(im.dataType(), col2imCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context.getCudaStream(), col.getSpecialBuffer(), col.getSpecialShapeInfo(), im.specialBuffer(), im.specialShapeInfo(), sH, sW, pH, pW, dH, dW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(im.dataType(), col2imCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context.getCudaStream(), col.specialBuffer(), col.specialShapeInfo(), im.specialBuffer(), im.specialShapeInfo(), sH, sW, pH, pW, dH, dW), FLOAT_TYPES); NDArray::registerSpecialUse({&im}, {&col}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/compression/compression.cu b/libnd4j/include/ops/declarable/helpers/cuda/compression/compression.cu index ecc1a348e..5de20c57f 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/compression/compression.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/compression/compression.cu @@ -24,7 +24,7 @@ namespace sd { namespace ops { namespace helpers { - void decodeBitmap(sd::LaunchContext* context, NDArray* input, NDArray* output) { + void decodeBitmap(sd::LaunchContext* context, const NDArray* input, NDArray* output) { auto stream = context->getCudaStream(); NDArray::prepareSpecialUse({output}, {input}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/compression/threshold.cu b/libnd4j/include/ops/declarable/helpers/cuda/compression/threshold.cu index 138970816..6b5af0df4 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/compression/threshold.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/compression/threshold.cu @@ -113,7 +113,7 @@ namespace sd { sd::DebugHelper::checkErrorCode(stream, "encodeThresholdP2Int(...) failed"); } - static void encodeThresholdP3_(void *dx, Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ + static void encodeThresholdP3_(void *dx, const Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ auto stream = LaunchContext::defaultContext()->getCudaStream(); int blockSize = 512; @@ -137,7 +137,7 @@ namespace sd { auto xType = updates.dataType(); NDArray::prepareSpecialUse({&tmp}, {&updates}); - BUILD_SINGLE_SELECTOR(xType, encoderKernelP1Generic, (launchDims, LaunchContext::defaultContext()->getCudaStream(), updates.getSpecialBuffer(), updates.lengthOf(), tmp.specialBuffer(), threshold), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(xType, encoderKernelP1Generic, (launchDims, LaunchContext::defaultContext()->getCudaStream(), updates.specialBuffer(), updates.lengthOf(), tmp.specialBuffer(), threshold), FLOAT_TYPES); NDArray::registerSpecialUse({&tmp}, {&updates}); return std::move(tmp); @@ -199,9 +199,9 @@ namespace sd { // filling offsets encodeThresholdP2Int_(reinterpret_cast(dptr), - reinterpret_cast(blocks.getSpecialBuffer()), + reinterpret_cast(blocks.specialBuffer()), numBlocks, - reinterpret_cast(offsets.getSpecialBuffer())); + reinterpret_cast(offsets.specialBuffer())); NDArray::registerSpecialUse({&blocks, &offsets}, {}); pm.synchronize(); @@ -209,7 +209,7 @@ namespace sd { encodeThresholdP3_(updates.specialBuffer(), updates.shapeInfo(), - reinterpret_cast(offsets.getSpecialBuffer()), + reinterpret_cast(offsets.specialBuffer()), updates.lengthOf(), reinterpret_cast(encoded.specialBuffer())); @@ -223,7 +223,7 @@ namespace sd { auto xType = updates.dataType(); NDArray::prepareSpecialUse({&updates}, {&encoded}); - BUILD_SINGLE_SELECTOR(xType, decoderKernelGeneric, (launchDims, LaunchContext::defaultContext()->getCudaStream(), encoded.getSpecialBuffer(), updates.lengthOf(), updates.specialBuffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(xType, decoderKernelGeneric, (launchDims, LaunchContext::defaultContext()->getCudaStream(), encoded.specialBuffer(), updates.lengthOf(), updates.specialBuffer()), FLOAT_TYPES); NDArray::registerSpecialUse({&updates}, {&encoded}); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/concat.cu b/libnd4j/include/ops/declarable/helpers/cuda/concat.cu index 10e1d132c..cbcd35ffe 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/concat.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/concat.cu @@ -36,7 +36,7 @@ namespace helpers { /////////////////////////////////////////////////////////////////// template -__global__ static void concatCuda(void* pVx, void* pxShapeInfo, void* vz, Nd4jLong* zShapeInfo, const int axis) { +__global__ static void concatCuda(void* pVx, void* pxShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const int axis) { T* z = reinterpret_cast(vz); __shared__ Nd4jLong zLen, totalThreads; @@ -76,11 +76,10 @@ __global__ static void concatCuda(void* pVx, void* pxShapeInfo, void* vz, Nd4jL /////////////////////////////////////////////////////////////////// template __host__ static void concatCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, - void* pVx, void* pxShapeInfo, void* vz, Nd4jLong* zShapeInfo, const int axis) { + void* pVx, void* pxShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const int axis) { concatCuda<<>>(pVx, pxShapeInfo, vz, zShapeInfo, axis); } -BUILD_SINGLE_TEMPLATE(template void concatCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* vz, Nd4jLong* zShapeInfo, const int axis), LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// void concat(sd::LaunchContext * context, const std::vector& inArrs, NDArray& output, const int axis) { @@ -102,11 +101,11 @@ void concat(sd::LaunchContext * context, const std::vector& inAr if(luckCase1) { // for example {1,10} + {2,10} + {3,10} = {6, 10} order c; or {10,1} + {10,2} + {10,3} = {10, 6} order f - void* z = static_cast(output.getSpecialBuffer()); + void* z = static_cast(output.specialBuffer()); for (uint i = 0; i < numOfInArrs; ++i) { const auto memAmountToCopy = inArrs[i]->lengthOf() * sizeofT; - cudaMemcpyAsync(z, static_cast(inArrs[i]->getSpecialBuffer()), memAmountToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); + cudaMemcpyAsync(z, reinterpret_cast(inArrs[i]->specialBuffer()), memAmountToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); z = static_cast(z) + memAmountToCopy; } @@ -134,7 +133,7 @@ void concat(sd::LaunchContext * context, const std::vector& inAr // if(!areInputsContin || !allSameOrder) // break; - // strideOfContigStride[i] = shape::strideOverContigAxis(axis, inArrs[i]->getShapeInfo()); + // strideOfContigStride[i] = shape::strideOverContigAxis(axis, inArrs[i]->shapeInfo()); // } // } @@ -142,16 +141,16 @@ void concat(sd::LaunchContext * context, const std::vector& inAr // if(luckCase2) { // for example {2,1,3} + {2,5,3} + {2,10,3} = {2,16,3}, here axis 1 shoud have stride = 1 for all inputs arrays and output array - // const auto zStep = shape::strideOverContigAxis(axis, output.getShapeInfo()); + // const auto zStep = shape::strideOverContigAxis(axis, output.shapeInfo()); // for (uint i = 0; i < output.lengthOf() / output.sizeAt(axis); ++i) { // const auto iShift = i * sizeofT; - // void* z = static_cast(output.getSpecialBuffer()) + zStep * iShift; + // void* z = static_cast(output.specialBuffer()) + zStep * iShift; // for (uint j = 0; j < numOfInArrs; ++j) { // const auto xDim = inArrs[j]->sizeAt(axis); - // void* x = static_cast(inArrs[j]->getSpecialBuffer()) + strideOfContigStride[j] * iShift; + // void* x = static_cast(inArrs[j]->specialBuffer()) + strideOfContigStride[j] * iShift; // const auto memSizeToCopy = xDim * sizeofT; // cudaMemcpyAsync(z, x, memSizeToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); // z = static_cast(z) + memSizeToCopy; @@ -168,12 +167,12 @@ void concat(sd::LaunchContext * context, const std::vector& inAr const int sharedMem = 512; // prepare arrays of pointers on buffers and shapes - std::vector hInBuffers(numOfInArrs); - std::vector hInShapeInfo(numOfInArrs); + std::vector hInBuffers(numOfInArrs); + std::vector hInShapeInfo(numOfInArrs); for(int i = 0; i < numOfInArrs; ++i) { - hInBuffers[i] = inArrs[i]->getSpecialBuffer(); - hInShapeInfo[i] = inArrs[i]->getSpecialShapeInfo(); + hInBuffers[i] = inArrs[i]->specialBuffer(); + hInShapeInfo[i] = inArrs[i]->specialShapeInfo(); } PointersManager manager(context, "helpers::concat"); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu index edb7538d4..dfa86124a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu @@ -38,7 +38,7 @@ namespace helpers { } template - __global__ static void confusionFunctorKernel(Nd4jLong* labelsBuffer, Nd4jLong* predictionBuffer, Nd4jLong bufferLength, void const* weightsBuffer, void* outputBuffer, Nd4jLong* tadShape, Nd4jLong* tadOffsets) { + __global__ static void confusionFunctorKernel(Nd4jLong* labelsBuffer, Nd4jLong* predictionBuffer, Nd4jLong bufferLength, void const* weightsBuffer, void* outputBuffer, const Nd4jLong* tadShape, const Nd4jLong* tadOffsets) { __shared__ int arrIdx, blocksPerArr; __shared__ T *z; __shared__ T const* w; @@ -80,7 +80,7 @@ namespace helpers { if (err != 0) throw sd::cuda_exception::build("Cannot allocate memory for labels long buffer", err); // copy with type conversion - copyBuffers<<<256, 512, 1024, *stream>>>(labelsLongBuffer, labels->getSpecialBuffer(), labels->lengthOf()); + copyBuffers<<<256, 512, 1024, *stream>>>(labelsLongBuffer, labels->specialBuffer(), labels->lengthOf()); } if (predictionLongBuffer == nullptr) { @@ -88,22 +88,22 @@ namespace helpers { if (err != 0) throw sd::cuda_exception::build("Cannot allocate memory for predictions long buffer", err); // copy with type conversion - copyBuffers<<<256, 512, 1024, *stream>>>(predictionLongBuffer, predictions->getSpecialBuffer(), predictions->lengthOf()); + copyBuffers<<<256, 512, 1024, *stream>>>(predictionLongBuffer, predictions->specialBuffer(), predictions->lengthOf()); } auto bufferLength = labels->lengthOf(); dim3 launchDims(32, 32, 1024); - confusionFunctorKernel<<>>(labelsLongBuffer, predictionLongBuffer, bufferLength, weights != nullptr? weights->getSpecialBuffer():nullptr, output->specialBuffer(), pack.specialShapeInfo(), pack.specialOffsets()); + confusionFunctorKernel<<>>(labelsLongBuffer, predictionLongBuffer, bufferLength, weights != nullptr? weights->specialBuffer():nullptr, output->specialBuffer(), pack.specialShapeInfo(), pack.specialOffsets()); manager.synchronize(); - if (predictionLongBuffer != predictions->getSpecialBuffer()) { + if (predictionLongBuffer != predictions->specialBuffer()) { cudaError_t err = cudaFree(predictionLongBuffer); if (err != 0) throw sd::cuda_exception::build("Cannot deallocate memory for predictions long buffer", err); } - if (labelsLongBuffer != labels->getSpecialBuffer()) { + if (labelsLongBuffer != labels->specialBuffer()) { cudaError_t err = cudaFree(labelsLongBuffer); if (err != 0) throw sd::cuda_exception::build("Cannot deallocate memory for labels long buffer", err); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_col2vol.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_col2vol.cu index d751c2b1e..80df76c91 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_col2vol.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_col2vol.cu @@ -121,7 +121,7 @@ void ConvolutionUtils::col2vol(sd::graph::Context& block, const NDArray& col, ND const int sharedMem = col.rankOf() * sizeof(uint) * threadsPerBlock + 256; NDArray::prepareSpecialUse({&vol}, {&col}); - BUILD_SINGLE_SELECTOR(vol.dataType(), col2volCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), col.getSpecialBuffer(), col.getSpecialShapeInfo(), vol.specialBuffer(), vol.specialShapeInfo(), sD, sH, sW, pD, pH, pW, dD, dH, dW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(vol.dataType(), col2volCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), col.specialBuffer(), col.specialShapeInfo(), vol.specialBuffer(), vol.specialShapeInfo(), sD, sH, sW, pD, pH, pW, dD, dH, dW), FLOAT_TYPES); NDArray::registerSpecialUse({&vol}, {&col}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2d.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2d.cu index eb336cb76..c146be7bf 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2d.cu @@ -118,7 +118,7 @@ static __global__ void avgPooling2dCuda(const void *vx, const Nd4jLong *xShapeIn ////////////////////////////////////////////////////////////////////////// template -static void avgPooling2dCudaLauncher(sd::LaunchContext & block, void *vx, Nd4jLong *vxShapeInfo, void *vz, Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { +static void avgPooling2dCudaLauncher(sd::LaunchContext & block, const void *vx, const Nd4jLong *vxShapeInfo, void *vz, const Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { avgPooling2dCuda<<<512, 512, 4192, *block.getCudaStream()>>>(vx, vxShapeInfo, vz, vzShapeInfo, kH, kW, sH, sW, pH, pW, dH, dW, extraParam0); } @@ -209,7 +209,7 @@ static __global__ void pnormPooling2dCuda(const void *vx, const Nd4jLong *xShape ////////////////////////////////////////////////////////////////////////// template -static void pnormPooling2dCudaLauncher(sd::LaunchContext & block, void *vx, Nd4jLong *vxShapeInfo, void *vz, Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { +static void pnormPooling2dCudaLauncher(sd::LaunchContext & block, const void *vx, const Nd4jLong *vxShapeInfo, void *vz, const Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { pnormPooling2dCuda<<<512, 512, 4192, *block.getCudaStream()>>>(vx, vxShapeInfo, vz, vzShapeInfo, kH, kW, sH, sW, pH, pW, dH, dW, extraParam0); } @@ -303,7 +303,7 @@ static __global__ void maxPooling2dCuda(const void *vx, const Nd4jLong *xShapeIn ////////////////////////////////////////////////////////////////////////// template -static void maxPooling2dCudaLauncher(sd::LaunchContext & block, void *vx, Nd4jLong *vxShapeInfo, void *vz, Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { +static void maxPooling2dCudaLauncher(sd::LaunchContext & block, const void *vx, const Nd4jLong *vxShapeInfo, void *vz, const Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { maxPooling2dCuda<<<512, 512, 4192, *block.getCudaStream()>>>(vx, vxShapeInfo, vz, vzShapeInfo, kH, kW, sH, sW, pH, pW, dH, dW, extraParam0); } @@ -315,15 +315,15 @@ void ConvolutionUtils::pooling2d(sd::graph::Context& block, const NDArray& input switch (poolingMode) { case MAX_POOL: { - BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), maxPooling2dCudaLauncher, (*block.launchContext(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), maxPooling2dCudaLauncher, (*block.launchContext(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); } break; case AVG_POOL: { - BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), avgPooling2dCudaLauncher, (*block.launchContext(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), avgPooling2dCudaLauncher, (*block.launchContext(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); } break; case PNORM_POOL: { - BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), pnormPooling2dCudaLauncher, (*block.launchContext(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), pnormPooling2dCudaLauncher, (*block.launchContext(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); } break; default: diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2dBP.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2dBP.cu index 26808ad4c..62f4787dd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2dBP.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2dBP.cu @@ -178,7 +178,7 @@ void ConvolutionUtils::pooling2dBP(sd::graph::Context& block, const NDArray& inp const int sharedMem = gradO.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&gradI}, {&input, &gradO}); - BUILD_SINGLE_SELECTOR(input.dataType(), pooling2dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), pooling2dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&input, &gradO}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3d.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3d.cu index 93e372a7e..0a3bfc9b6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3d.cu @@ -170,7 +170,7 @@ void ConvolutionUtils::pooling3d(sd::graph::Context& block, const NDArray& input const int sharedMem = output.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), pooling3dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), pooling3dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3dBP.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3dBP.cu index 51b48bc23..fd78bb80b 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3dBP.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3dBP.cu @@ -192,7 +192,7 @@ void ConvolutionUtils::pooling3dBP(sd::graph::Context& block, const NDArray& inp const int sharedMem = gradO.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&gradI}, {&input, &gradO}); - BUILD_SINGLE_SELECTOR(input.dataType(), pooling3dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), pooling3dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&input, &gradO}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2d.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2d.cu index be9fab0be..ee1fa8924 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2d.cu @@ -87,7 +87,7 @@ void ConvolutionUtils::upsampling2d(sd::graph::Context& block, const NDArray& in const int sharedMem = output.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), upsampling2dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), factorH, factorW, isNCHW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), upsampling2dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), factorH, factorW, isNCHW), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2dBP.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2dBP.cu index ce393d279..c6864c48a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2dBP.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2dBP.cu @@ -93,7 +93,7 @@ void ConvolutionUtils::upsampling2dBP(sd::graph::Context& block, const NDArray& const int sharedMem = gradI.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&gradI}, {&gradO}); - BUILD_SINGLE_SELECTOR(gradI.dataType(), upsampling2dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), isNCHW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradI.dataType(), upsampling2dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), isNCHW), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&gradO}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3d.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3d.cu index 6f15a27d6..1acb4307f 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3d.cu @@ -88,7 +88,7 @@ void ConvolutionUtils::upsampling3d(sd::graph::Context& block, const NDArray& in const int sharedMem = output.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), upsampling3dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), factorD, factorH, factorW, isNCDHW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), upsampling3dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), factorD, factorH, factorW, isNCDHW), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3dBP.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3dBP.cu index f9eb56bec..5a1e08c07 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3dBP.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3dBP.cu @@ -96,7 +96,7 @@ void ConvolutionUtils::upsampling3dBP(sd::graph::Context& block, const NDArray& const int sharedMem = gradI.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&gradI}, {&gradO}); - BUILD_SINGLE_SELECTOR(gradI.dataType(), upsampling3dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), isNCDHW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradI.dataType(), upsampling3dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), isNCDHW), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&gradO}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_vol2col.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_vol2col.cu index ebe0ec26e..c2c5fb3ef 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_vol2col.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_vol2col.cu @@ -101,7 +101,7 @@ void ConvolutionUtils::vol2col(sd::graph::Context& block, const NDArray& vol, ND const int sharedMem = col.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&col}, {&vol}); - BUILD_SINGLE_SELECTOR(vol.dataType(), vol2colCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), vol.getSpecialBuffer(), vol.getSpecialShapeInfo(), col.specialBuffer(), col.specialShapeInfo(), sD, sH, sW, pD, pH, pW, dD, dH, dW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(vol.dataType(), vol2colCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), vol.specialBuffer(), vol.specialShapeInfo(), col.specialBuffer(), col.specialShapeInfo(), sD, sH, sW, pD, pH, pW, dD, dH, dW), FLOAT_TYPES); NDArray::registerSpecialUse({&col}, {&vol}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/cross.cu b/libnd4j/include/ops/declarable/helpers/cuda/cross.cu index d7694641c..8de4f65fd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/cross.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/cross.cu @@ -111,7 +111,7 @@ void crossBatched(sd::LaunchContext* context, NDArray *x, NDArray *y, NDArray *z PointersManager manager(context, "cross"); NDArray::prepareSpecialUse({z}, {x, y}); - BUILD_SINGLE_SELECTOR(x->dataType(), crossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), y->getSpecialBuffer(), y->getSpecialShapeInfo(), z->specialBuffer(), z->specialShapeInfo()), NUMERIC_TYPES); + BUILD_SINGLE_SELECTOR(x->dataType(), crossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->specialBuffer(), x->specialShapeInfo(), y->specialBuffer(), y->specialShapeInfo(), z->specialBuffer(), z->specialShapeInfo()), NUMERIC_TYPES); NDArray::registerSpecialUse({z}, {x, y}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/d_t_s.cu b/libnd4j/include/ops/declarable/helpers/cuda/d_t_s.cu index fc3b04ee8..35d8bf033 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/d_t_s.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/d_t_s.cu @@ -25,9 +25,9 @@ namespace ops { namespace helpers { template - static _CUDA_G void depthToSpaceKernel(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, const int block_size, const bool isNHWC) { - T *input_ptr = reinterpret_cast(vx); - T *output_ptr = reinterpret_cast(vz); + static _CUDA_G void depthToSpaceKernel(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, const int block_size, const bool isNHWC) { + auto input_ptr = reinterpret_cast(vx); + auto output_ptr = reinterpret_cast(vz); const int batch_size = shape::sizeAt(xShapeInfo, 0); const int input_depth = isNHWC ? shape::sizeAt(xShapeInfo, 3) : shape::sizeAt(xShapeInfo, 1); @@ -89,7 +89,7 @@ namespace helpers { template static void __depthToSpace(sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC) { - depthToSpaceKernel<<<512, 512, 1024, *context->getCudaStream()>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), block_size, isNHWC); + depthToSpaceKernel<<<512, 512, 1024, *context->getCudaStream()>>>(input.specialBuffer(), input.specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), block_size, isNHWC); } void _depthToSpace(sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC) { @@ -100,9 +100,6 @@ namespace helpers { BUILD_SINGLE_SELECTOR(xType, __depthToSpace, (context, input, output, block_size, isNHWC), LIBND4J_TYPES); NDArray::registerSpecialUse({output}, {&input}); } - - BUILD_SINGLE_TEMPLATE(template void __depthToSpace, (sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC);, LIBND4J_TYPES); - } } } \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/diGamma.cu b/libnd4j/include/ops/declarable/helpers/cuda/diGamma.cu index a6d06be17..ff217bdb6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/diGamma.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/diGamma.cu @@ -66,7 +66,7 @@ void diGamma(sd::LaunchContext* context, const NDArray& x, NDArray& z) { int blocksPerGrid = (z.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({&z}, {&x}); - BUILD_SINGLE_SELECTOR(x.dataType(), diGammaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(x.dataType(), diGammaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), x.specialBuffer(), x.specialShapeInfo(), z.specialBuffer(), z.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&z}, {&x}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/diag.cu b/libnd4j/include/ops/declarable/helpers/cuda/diag.cu index 87fd2aa98..f011f4095 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/diag.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/diag.cu @@ -33,7 +33,7 @@ namespace helpers { // inputLength - length for input tensor // template -static __global__ void diagFunctorKernel(void* outputBuffer, Nd4jLong* outputShape, void const* inputBuffer, Nd4jLong* inputShape, Nd4jLong inputLength) { +static __global__ void diagFunctorKernel(void* outputBuffer, const Nd4jLong* outputShape, void const* inputBuffer, const Nd4jLong* inputShape, Nd4jLong inputLength) { __shared__ T *z; __shared__ T const* x; __shared__ Nd4jLong outputLength; @@ -65,7 +65,7 @@ static __global__ void diagFunctorKernel(void* outputBuffer, Nd4jLong* outputSha // inputLength - given length for input tensor // template - static __global__ void diagPartFunctorKernel(void* outputBuffer, Nd4jLong* outputShape, void const* inputBuffer, Nd4jLong* inputShape, Nd4jLong outputLength, Nd4jLong inputLength) { + static __global__ void diagPartFunctorKernel(void* outputBuffer, const Nd4jLong* outputShape, void const* inputBuffer, const Nd4jLong* inputShape, Nd4jLong outputLength, Nd4jLong inputLength) { __shared__ T *z; __shared__ T const* x; @@ -96,7 +96,7 @@ static __global__ void diagFunctorKernel(void* outputBuffer, Nd4jLong* outputSha dim3 launchDims(256, 512, 8192); if (!input->isActualOnDeviceSide()) input->syncToDevice(); - diagFunctorKernel<<>>(output->specialBuffer(), output->specialShapeInfo(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), inputLength); + diagFunctorKernel<<>>(output->specialBuffer(), output->specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), inputLength); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -121,7 +121,7 @@ static __global__ void diagFunctorKernel(void* outputBuffer, Nd4jLong* outputSha if (!input->isActualOnDeviceSide()) input->syncToDevice(); - diagPartFunctorKernel<<>>(output->specialBuffer(), output->specialShapeInfo(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), outLen, inLen); + diagPartFunctorKernel<<>>(output->specialBuffer(), output->specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), outLen, inLen); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dilation2d.cu b/libnd4j/include/ops/declarable/helpers/cuda/dilation2d.cu index c05b5fb6d..0d25552c9 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/dilation2d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/dilation2d.cu @@ -122,7 +122,7 @@ void dilation2d(sd::LaunchContext* context, NDArray *input, NDArray *weights, ND const int sharedMem = (weights->rankOf() + output->rankOf()) * sizeof(int) * threadsPerBlock + 128; NDArray::prepareSpecialUse({output}, {input, weights}); - BUILD_SINGLE_SELECTOR_TWICE(input->dataType(), dilation2dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), weights->getSpecialBuffer(), weights->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), sH, sW, pH, pW, dH, dW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(input->dataType(), dilation2dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), weights->specialBuffer(), weights->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), sH, sW, pH, pW, dH, dW), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input, weights}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dropout.cu b/libnd4j/include/ops/declarable/helpers/cuda/dropout.cu index aee79caa7..4e0fdb377 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/dropout.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/dropout.cu @@ -29,7 +29,7 @@ namespace ops { namespace helpers { template - static __global__ void dropoutSimpleKernel(void const* inputBuf, Nd4jLong const* inputShape, void* outputBuf, Nd4jLong* outputShape, double probVal, int inLen, sd::graph::RandomGenerator* nodeRng) { + static __global__ void dropoutSimpleKernel(void const* inputBuf, Nd4jLong const* inputShape, void* outputBuf, Nd4jLong const* outputShape, double probVal, int inLen, sd::graph::RandomGenerator* nodeRng) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; T const* input = reinterpret_cast(inputBuf); @@ -62,7 +62,7 @@ namespace helpers { throw cuda_exception::build("helpers::dropoutSimple: Cannot set up device memory for random generator.", err); } - dropoutSimpleKernel<<<128, 256, 1024, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), probValue, inLen, dRandom); + dropoutSimpleKernel<<<128, 256, 1024, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), probValue, inLen, dRandom); err = cudaFree(dRandom); if (err) { throw cuda_exception::build("helpers::dropoutSimple: Cannot deallocate device memory for random generator.", err); @@ -124,7 +124,7 @@ namespace helpers { /////////////////////////////////// backrpopagations /////////////////////////////////////////////// template - static __global__ void dropoutBPKernel(void* outputBuf, Nd4jLong* outputShape, void* gradOutBuf, Nd4jLong* gradOutShape, double probValue) { + static __global__ void dropoutBPKernel(void* outputBuf, Nd4jLong const* outputShape, void* gradOutBuf, Nd4jLong const* gradOutShape, double probValue) { __shared__ T* output; __shared__ T* input; __shared__ int len; @@ -165,7 +165,7 @@ namespace helpers { } template - static __global__ void alphaDropoutSimpleKernel(void const* inputBuf, Nd4jLong const* inputShape, void* outputBuf, Nd4jLong* outputShape, double probValue, double alpha, double alpha1, double beta, int inLen, sd::graph::RandomGenerator* nodeRng) { + static __global__ void alphaDropoutSimpleKernel(void const* inputBuf, Nd4jLong const* inputShape, void* outputBuf, Nd4jLong const* outputShape, double probValue, double alpha, double alpha1, double beta, int inLen, sd::graph::RandomGenerator* nodeRng) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; T const* input = reinterpret_cast(inputBuf); @@ -191,7 +191,7 @@ namespace helpers { throw cuda_exception::build("helpers::alphaDropoutSimple: Cannot set up device memory for random generator.", err); } - alphaDropoutSimpleKernel<<<128, 256, 1024, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), probValue, alpha, alpha1, beta, output->lengthOf(), dRandom); + alphaDropoutSimpleKernel<<<128, 256, 1024, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), probValue, alpha, alpha1, beta, output->lengthOf(), dRandom); err = cudaFree(dRandom); if (err) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu index a80d838be..6f29995d3 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu @@ -27,9 +27,9 @@ namespace sd { template - static _CUDA_G void dynamicPartitionScalarKernel(void *vx, Nd4jLong *xShapeInfo, void *vi, Nd4jLong *iShapeInfo, void **vz, Nd4jLong **zShapeInfos, const Nd4jLong numOutputs) { - auto x = reinterpret_cast(vx); - auto i = reinterpret_cast(vi); + static _CUDA_G void dynamicPartitionScalarKernel(const void *vx, const Nd4jLong *xShapeInfo, const void *vi, const Nd4jLong *iShapeInfo, void **vz, Nd4jLong **zShapeInfos, const Nd4jLong numOutputs) { + auto x = reinterpret_cast(vx); + auto i = reinterpret_cast(vi); auto xLength = shape::length(xShapeInfo); auto iLength = shape::length(iShapeInfo); @@ -85,9 +85,9 @@ namespace sd { } template - static _CUDA_G void dynamicPartitionTadKernel(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, Nd4jLong xLength, void *vindices, Nd4jLong *iShapeInfo, Nd4jLong iLength, void **vz, Nd4jLong **zTadShapeInfos, Nd4jLong **zTadOffsets, Nd4jLong numOutputs) { - auto x = reinterpret_cast(vx); - auto indices = reinterpret_cast(vindices); + static _CUDA_G void dynamicPartitionTadKernel(const void *vx, const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, Nd4jLong xLength, const void *vindices, const Nd4jLong *iShapeInfo, Nd4jLong iLength, void **vz, Nd4jLong **zTadShapeInfos, Nd4jLong **zTadOffsets, Nd4jLong numOutputs) { + auto x = reinterpret_cast(vx); + auto indices = reinterpret_cast(vindices); // we run things in blocks, 1 partition per block of threads for (int i = blockIdx.x; i < numOutputs; i += gridDim.x) { @@ -124,11 +124,11 @@ namespace sd { for (int i = sourceDimsLen; i > 0; i--) sourceDims[sourceDimsLen - i] = input->rankOf() - i; //compute tad array for given dimensions - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), sourceDims); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), sourceDims); std::vector outBuffers(outSize); - std::vector tadShapes(outSize); - std::vector tadOffsets(outSize); + std::vector tadShapes(outSize); + std::vector tadOffsets(outSize); std::vector numTads(outSize); // fill up dimensions array for before kernel for (unsigned int i = 0; i < outSize; i++) { @@ -140,9 +140,9 @@ namespace sd { for (int k = 1; k < r; k++) outDims[k - 1] = k; - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(outputList.at(i)->getShapeInfo(), outDims); + auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(outputList.at(i)->shapeInfo(), outDims); - outBuffers[i] = outputList.at(i)->getSpecialBuffer(); + outBuffers[i] = outputList.at(i)->specialBuffer(); tadShapes[i] = packZ.platformShapeInfo(); tadOffsets[i] = packZ.platformOffsets(); } @@ -152,24 +152,24 @@ namespace sd { auto dOutTadShapes = reinterpret_cast(pm.replicatePointer(tadShapes.data(), tadShapes.size() * sizeof(Nd4jLong *))); auto dOutTadOffsets = reinterpret_cast(pm.replicatePointer(tadOffsets.data(), tadOffsets.size() * sizeof(Nd4jLong *))); // run kernel on device - dynamicPartitionTadKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), shape::length(packX.primaryShapeInfo()), indices->getSpecialBuffer(), indices->getSpecialShapeInfo(), indices->lengthOf(), dOutBuffers, dOutTadShapes, dOutTadOffsets, outSize); + dynamicPartitionTadKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), shape::length(packX.primaryShapeInfo()), indices->specialBuffer(), indices->specialShapeInfo(), indices->lengthOf(), dOutBuffers, dOutTadShapes, dOutTadOffsets, outSize); } else { // linear case auto numThreads = 256; auto shmemSize = numThreads * sizeof(Y) * 2 + 1024; std::vector outBuffers; - std::vector outShapes; + std::vector outShapes; for (auto v:outputList) { - outBuffers.emplace_back(v->getSpecialBuffer()); - outShapes.emplace_back(v->getSpecialShapeInfo()); + outBuffers.emplace_back(v->specialBuffer()); + outShapes.emplace_back(v->specialShapeInfo()); } auto dOutBuffers = reinterpret_cast(pm.replicatePointer(outBuffers.data(), outBuffers.size() * sizeof(void *))); auto dOutShapes = reinterpret_cast(pm.replicatePointer(outShapes.data(), outShapes.size() * sizeof(Nd4jLong *))); - dynamicPartitionScalarKernel<<<256, numThreads, shmemSize, *context->getCudaStream()>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), indices->getSpecialBuffer(), indices-> getSpecialShapeInfo(), dOutBuffers, dOutShapes, outSize); + dynamicPartitionScalarKernel<<<256, numThreads, shmemSize, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), dOutBuffers, dOutShapes, outSize); } pm.synchronize(); @@ -177,7 +177,7 @@ namespace sd { template - static _CUDA_G void dynamicStitchScalarKernel(void **vx, Nd4jLong **xShapeInfos, void **vindices, Nd4jLong **iShapeInfos, int inputSize, void *vz, Nd4jLong *zShapeInfo, Nd4jLong zLength) { + static _CUDA_G void dynamicStitchScalarKernel(void **vx, Nd4jLong **xShapeInfos, void **vindices, Nd4jLong **iShapeInfos, int inputSize, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong zLength) { auto z = reinterpret_cast(vz); for (int e = blockIdx.x; e < inputSize; e += gridDim.x) { @@ -198,7 +198,7 @@ namespace sd { } template - static _CUDA_G void dynamicStitchTadKernel(void **vx, Nd4jLong **xTadShapeInfos, Nd4jLong **xTadOffsets, void **vindices, Nd4jLong **iShapeInfos, int inputSize, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + static _CUDA_G void dynamicStitchTadKernel(void **vx, Nd4jLong **xTadShapeInfos, Nd4jLong **xTadOffsets, void **vindices, Nd4jLong **iShapeInfos, int inputSize, void *vz, const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets) { auto bz = reinterpret_cast(vz); for (int e = blockIdx.x; e < inputSize; e += gridDim.x) { @@ -237,17 +237,17 @@ namespace sd { PointersManager pm(context, "dynamicStitch"); if (output->isVector()) { - std::vector inputBuffers(inputSize); - std::vector inputShapes(inputSize); - std::vector indicesBuffers(inputSize); - std::vector indicesShapes(inputSize); + std::vector inputBuffers(inputSize); + std::vector inputShapes(inputSize); + std::vector indicesBuffers(inputSize); + std::vector indicesShapes(inputSize); for (int e = 0; e < inputSize; e++) { - inputBuffers[e] = inputs.at(e)->getSpecialBuffer(); - indicesBuffers[e] = indices.at(e)->getSpecialBuffer(); + inputBuffers[e] = inputs.at(e)->specialBuffer(); + indicesBuffers[e] = indices.at(e)->specialBuffer(); - inputShapes[e] = inputs.at(e)->getSpecialShapeInfo(); - indicesShapes[e] = indices.at(e)->getSpecialShapeInfo(); + inputShapes[e] = inputs.at(e)->specialShapeInfo(); + indicesShapes[e] = indices.at(e)->specialShapeInfo(); } // copying pointers to buffers to device @@ -262,26 +262,26 @@ namespace sd { for (int i = restDims.size(); i > 0; i--) restDims[restDims.size() - i] = output->rankOf() - i; - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), restDims); + auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), restDims); - std::vector inputBuffers(inputSize); - std::vector inputTadShapes(inputSize); - std::vector inputTadOffsets(inputSize); + std::vector inputBuffers(inputSize); + std::vector inputTadShapes(inputSize); + std::vector inputTadOffsets(inputSize); - std::vector indicesBuffers(inputSize); - std::vector indicesShapes(inputSize); + std::vector indicesBuffers(inputSize); + std::vector indicesShapes(inputSize); for (int e = 0; e < inputSize; e++) { std::vector sourceDims(inputs[e]->rankOf() - indices[e]->rankOf()); for (int i = sourceDims.size(); i > 0; i--) sourceDims[sourceDims.size() - i] = inputs[e]->rankOf() - i; - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(inputs[e]->getShapeInfo(), sourceDims); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(inputs[e]->shapeInfo(), sourceDims); - indicesBuffers[e] = indices[e]->getSpecialBuffer(); - indicesShapes[e] = indices[e]->getSpecialShapeInfo(); + indicesBuffers[e] = indices[e]->specialBuffer(); + indicesShapes[e] = indices[e]->specialShapeInfo(); - inputBuffers[e] = inputs[e]->getSpecialBuffer(); + inputBuffers[e] = inputs[e]->specialBuffer(); inputTadShapes[e] = packX.platformShapeInfo(); inputTadOffsets[e] = packX.platformOffsets(); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu b/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu index 3a0ea9240..c5e8848cb 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu @@ -47,7 +47,7 @@ namespace helpers { // - outputOffsets - output TAD offsets // template - static __global__ void globalExtractPatchesKernel(bool theSame, int batchCount, int sizeRow, int sizeCol, int rowDim, int colDim, int outRowDim, int outColDim, int strideRow, int strideCol, int rateRow, int rateCol, int rowCast, int colCast, int lastDim, T* input, Nd4jLong* patchShape, Nd4jLong* inputOffsets, T* output, Nd4jLong* outTadShape, Nd4jLong* outputOffsets) { + static __global__ void globalExtractPatchesKernel(bool theSame, int batchCount, int sizeRow, int sizeCol, int rowDim, int colDim, int outRowDim, int outColDim, int strideRow, int strideCol, int rateRow, int rateCol, int rowCast, int colCast, int lastDim, const T* input, const Nd4jLong* patchShape, const Nd4jLong* inputOffsets, T* output, const Nd4jLong* outTadShape, const Nd4jLong* outputOffsets) { auto start = threadIdx.x + blockIdx.x * blockDim.x; @@ -114,8 +114,8 @@ namespace helpers { if (sizeCol * rateCol < 3) colCast = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(images->getShapeInfo(), restDims.data(), restDims.size()); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), restDims.data(), restDims.size()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(images->shapeInfo(), restDims.data(), restDims.size()); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), restDims.data(), restDims.size()); int batchCount = packX.numberOfTads(); PointersManager manager(context, "helpers::extractPatches"); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu index 262b1fe3e..7fcd71dba 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu @@ -79,9 +79,11 @@ namespace helpers { } template - static __global__ void fakeQuantWithMinMaxKernel(T* input, Nd4jLong* inputShape, T* min, T* max, - int lowIntBound, int upperIntBound, Nd4jLong channels, - T* output, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void fakeQuantWithMinMaxKernel(const T* input, const Nd4jLong* inputShape, + T* min, T* max, + int lowIntBound, int upperIntBound, Nd4jLong channels, + T* output, const Nd4jLong* outputShape, + Nd4jLong length) { __shared__ int block; if (threadIdx.x == 0) { block = length / channels; // to loop with last dimension as block @@ -129,10 +131,6 @@ namespace helpers { void fakeQuantWithMinMaxVarsPerChannel(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (context, input, min, max, numBits, narrowed, output), FLOAT_TYPES); } - - BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); - BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVarsPerChannel_, (LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); - } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/flatten.cu b/libnd4j/include/ops/declarable/helpers/cuda/flatten.cu index 3600104e1..aa2ff8297 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/flatten.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/flatten.cu @@ -25,7 +25,7 @@ namespace sd { namespace ops { namespace helpers { template - void _CUDA_G flattenKernel(void **xBuffers, Nd4jLong **xShapeInfos, Nd4jLong *offsets, Nd4jLong numInputs, void *zBuffer, Nd4jLong *zShapeInfo, char order) { + void _CUDA_G flattenKernel(void **xBuffers, Nd4jLong **xShapeInfos, Nd4jLong *offsets, Nd4jLong numInputs, void *zBuffer, const Nd4jLong *zShapeInfo, char order) { int xCoord[MAX_RANK]; @@ -47,9 +47,9 @@ namespace sd { void flatten_(sd::LaunchContext *context, std::vector &inputs, NDArray *output, char order) { PointersManager pm(context, "flatten"); - std::vector hdBuffers(inputs.size()); + std::vector hdBuffers(inputs.size()); std::vector hOffsets(inputs.size()); - std::vector hdShapes(inputs.size()); + std::vector hdShapes(inputs.size()); Nd4jLong cOffset = 0; // calculating offsets in output @@ -67,7 +67,7 @@ namespace sd { auto dOffsets = (Nd4jLong *) pm.replicatePointer(hOffsets.data(), inputs.size() * sizeof(Nd4jLong)); - flattenKernel<<<256, 512, 8192, *context->getCudaStream()>>>(dBuffers, dShapes, dOffsets, inputs.size(), output->getSpecialBuffer(), output->getSpecialShapeInfo(), order); + flattenKernel<<<256, 512, 8192, *context->getCudaStream()>>>(dBuffers, dShapes, dOffsets, inputs.size(), output->specialBuffer(), output->specialShapeInfo(), order); pm.synchronize(); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/gather.cu b/libnd4j/include/ops/declarable/helpers/cuda/gather.cu index 03d2f35d8..26778aa63 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/gather.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/gather.cu @@ -161,13 +161,13 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in sizeof(Nd4jLong))); NDArray::prepareSpecialUse({output}, {input, pIndices}); - BUILD_DOUBLE_SELECTOR(input->dataType(), pIndices->dataType(), gatherCudaLauncher, (context->getCudaStream(), numOfSubArrs, input->getSpecialBuffer(), xShapeInfo, xOffsets, pIndices->getSpecialBuffer(), pIndices->getSpecialShapeInfo(), output->getSpecialBuffer(), zShapeInfo, zOffsets), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(input->dataType(), pIndices->dataType(), gatherCudaLauncher, (context->getCudaStream(), numOfSubArrs, input->specialBuffer(), xShapeInfo, xOffsets, pIndices->specialBuffer(), pIndices->specialShapeInfo(), output->specialBuffer(), zShapeInfo, zOffsets), LIBND4J_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({output}, {input, pIndices}); manager.synchronize(); } else { NDArray::prepareSpecialUse({output}, {input, pIndices}); - BUILD_DOUBLE_SELECTOR(input->dataType(), pIndices->dataType(), gatherCudaLinear, (context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), pIndices->getSpecialBuffer(), pIndices->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo()), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(input->dataType(), pIndices->dataType(), gatherCudaLinear, (context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), pIndices->specialBuffer(), pIndices->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo()), LIBND4J_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({output}, {input, pIndices}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu b/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu index 21ab1ff98..d72f3e1bc 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu @@ -135,7 +135,7 @@ namespace sd { PointersManager manager(context, "gatherND"); NDArray::prepareSpecialUse({&output}, {&input, &indices}); - BUILD_DOUBLE_SELECTOR(xType, yType, gatherNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo()), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, gatherNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), indices.specialBuffer(), indices.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), LIBND4J_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({&output}, {&input, &indices}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/hamming.cu b/libnd4j/include/ops/declarable/helpers/cuda/hamming.cu index f88ec6003..e3fdd9411 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/hamming.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/hamming.cu @@ -25,9 +25,9 @@ namespace sd { namespace ops { namespace helpers { template - static _CUDA_G void _hammingKernel(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, void *reductionBuffer, Nd4jLong length) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + static _CUDA_G void _hammingKernel(const void *vx, const Nd4jLong *xShapeInfo, const void *vy, const Nd4jLong *yShapeInfo, void *vz, void *reductionBuffer, Nd4jLong length) { + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); __shared__ Nd4jLong *shared; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu index c6123d6da..6d7310fc0 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu @@ -25,7 +25,7 @@ namespace sd { namespace ops { namespace helpers { template - void _CUDA_G histogramKernel(void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, void *allocationPointer, void *reductionPointer, Nd4jLong numBins, X* min_val, X* max_val) { + void _CUDA_G histogramKernel(void *xBuffer, const Nd4jLong *xShapeInfo, void *zBuffer, const Nd4jLong *zShapeInfo, void *allocationPointer, void *reductionPointer, Nd4jLong numBins, X* min_val, X* max_val) { int tid = blockIdx.x * blockDim.x + threadIdx.x; auto dx = reinterpret_cast(xBuffer); auto result = reinterpret_cast(zBuffer); @@ -108,13 +108,13 @@ namespace sd { } template - static void histogram_(sd::LaunchContext *context, void *xBuffer, Nd4jLong *xShapeInfo, Nd4jLong *dxShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, Nd4jLong numBins, void* min_val, void* max_val) { + static void histogram_(sd::LaunchContext *context, void *xBuffer, const Nd4jLong *xShapeInfo, const Nd4jLong *dxShapeInfo, void *zBuffer, const Nd4jLong *zShapeInfo, Nd4jLong numBins, void* min_val, void* max_val) { int numThreads = 256; int numBlocks = sd::math::nd4j_max(256, sd::math::nd4j_min(1, shape::length(xShapeInfo) / numThreads)); int workspaceSize = numBlocks * numBins; auto tmp = NDArrayFactory::create('c', {workspaceSize}, context); - histogramKernel<<getCudaStream()>>>(xBuffer, dxShapeInfo, zBuffer, zShapeInfo, tmp.getSpecialBuffer(), context->getReductionPointer(), numBins, reinterpret_cast(min_val), reinterpret_cast(max_val)); + histogramKernel<<getCudaStream()>>>(xBuffer, dxShapeInfo, zBuffer, zShapeInfo, tmp.specialBuffer(), context->getReductionPointer(), numBins, reinterpret_cast(min_val), reinterpret_cast(max_val)); cudaStreamSynchronize(*context->getCudaStream()); } @@ -127,7 +127,7 @@ namespace sd { auto max_val = input.reduceNumber(reduce::SameOps::Max); // min_val.printIndexedBuffer("MIN"); // max_val.printIndexedBuffer("MAX"); - BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (context, input.specialBuffer(), input.shapeInfo(), input.specialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), numBins, min_val.specialBuffer(), max_val.specialBuffer()), LIBND4J_TYPES, INTEGER_TYPES); + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (context, input.specialBuffer(), input.shapeInfo(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), numBins, min_val.specialBuffer(), max_val.specialBuffer()), LIBND4J_TYPES, INTEGER_TYPES); NDArray::registerSpecialUse({&output}, {&input}); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu index e39f9b438..adb5a3ec4 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu @@ -77,7 +77,7 @@ __host__ static void histogramFixedWidthCudaLauncher(const cudaStream_t *stream, const X leftEdge = range.e(0); const X rightEdge = range.e(1); - histogramFixedWidthCuda<<<256, 256, 1024, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftEdge, rightEdge); + histogramFixedWidthCuda<<<256, 256, 1024, *stream>>>(input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftEdge, rightEdge); } //////////////////////////////////////////////////////////////////////// @@ -164,8 +164,8 @@ void histogramFixedWidth(sd::LaunchContext* context, const NDArray& input, const // cudaError_t err = cudaMalloc(&outputBuffer, output.lengthOf() * sizeof(Nd4jLong)); // if (err != 0) // throw cuda_exception::build("helpers::histogramFixedWidth: Cannot allocate memory for output", err); -// copyBuffers<<<256, 512, 8192, *stream>>>(outputBuffer, output.getSpecialBuffer(), output.getSpecialShapeInfo(), output.lengthOf()); -// histogramFixedWidthKernel<<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge); +// copyBuffers<<<256, 512, 8192, *stream>>>(outputBuffer, output.specialBuffer(), output.specialShapeInfo(), output.lengthOf()); +// histogramFixedWidthKernel<<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.specialBuffer(), input.specialShapeInfo(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge); // returnBuffers<<<256, 512, 8192, *stream>>>(output.specialBuffer(), outputBuffer, output.specialShapeInfo(), output.lengthOf()); // //cudaSyncStream(*stream); // err = cudaFree(outputBuffer); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/im2col.cu b/libnd4j/include/ops/declarable/helpers/cuda/im2col.cu index 0dbca8c47..08f5959e8 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/im2col.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/im2col.cu @@ -93,7 +93,7 @@ void im2col(sd::LaunchContext& context, const NDArray& image, NDArray& columns, const int blocksPerGrid = (columns.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({&columns}, {&image}); - BUILD_SINGLE_SELECTOR(columns.dataType(), im2colCudaLauncher, (blocksPerGrid, threadsPerBlock, context, image.getSpecialBuffer(), columns.getSpecialBuffer(), image.getSpecialShapeInfo(), columns.getSpecialShapeInfo(), sH, sW, pH, pW, dH, dW, arrZeroPadVal.e(0)), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(columns.dataType(), im2colCudaLauncher, (blocksPerGrid, threadsPerBlock, context, image.specialBuffer(), columns.specialBuffer(), image.specialShapeInfo(), columns.specialShapeInfo(), sH, sW, pH, pW, dH, dW, arrZeroPadVal.e(0)), FLOAT_TYPES); NDArray::registerSpecialUse({&columns}, {&image}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu index 6d6ec95ed..47319f100 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu @@ -49,9 +49,12 @@ namespace helpers { } template - static __global__ void drawBoundingBoxesKernel(T const* images, Nd4jLong* imagesShape, float const* boxes, - Nd4jLong* boxesShape, float const* colorTable, Nd4jLong* colorTableShape, T* output, Nd4jLong* outputShape, - Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, Nd4jLong channels, Nd4jLong boxSize, Nd4jLong colorTableLen) { + static __global__ void drawBoundingBoxesKernel(T const* images, const Nd4jLong* imagesShape, + float const* boxes, const Nd4jLong* boxesShape, + float const* colorTable, const Nd4jLong* colorTableShape, + T* output, const Nd4jLong* outputShape, + Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, + Nd4jLong channels, Nd4jLong boxSize, Nd4jLong colorTableLen) { for (auto batch = blockIdx.x; batch < (int)batchSize; batch += gridDim.x) { // loop by batch for (auto boxIndex = 0; boxIndex < boxSize; ++boxIndex) { @@ -153,8 +156,8 @@ namespace helpers { auto boxesBuf = boxes->getDataBuffer()->specialAsT(); // boxes should be float32 auto colorsTableBuf = colorsTable.getDataBuffer()->specialAsT(); // color table is float32 auto outputBuf = output->dataBuffer()->specialAsT(); - drawBoundingBoxesKernel<<<128, 128, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), - boxesBuf, boxes->getSpecialShapeInfo(), colorsTableBuf, colorsTable.getSpecialShapeInfo(), + drawBoundingBoxesKernel<<<128, 128, 1024, *stream>>>(imagesBuf, images->specialShapeInfo(), + boxesBuf, boxes->specialShapeInfo(), colorsTableBuf, colorsTable.specialShapeInfo(), outputBuf, output->specialShapeInfo(), batchSize, width, height, channels, boxSize, colorsTable.lengthOf()); } @@ -171,7 +174,7 @@ namespace helpers { BUILD_SINGLE_SELECTOR(output->dataType(), drawBoundingBoxesH, (context, images, boxes, colors, output), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {images, boxes, colors}); } - BUILD_SINGLE_TEMPLATE(template void drawBoundingBoxesH, (sd::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output), FLOAT_TYPES); + } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu index 6a045bc8d..d483f87b3 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu @@ -128,7 +128,7 @@ namespace helpers { // template static __global__ void resizeImageKernel(T const* input, Nd4jLong const* inputShape, Z* outputYptr, - Nd4jLong* outputShape, Nd4jLong batchSize, Nd4jLong outWidth, Nd4jLong outHeight, Nd4jLong channels, + Nd4jLong const* outputShape, Nd4jLong batchSize, Nd4jLong outWidth, Nd4jLong outHeight, Nd4jLong channels, Nd4jLong inRowSize, Nd4jLong outRowSize, Nd4jLong inBatchNumValues, BilinearInterpolationData* xs_, BilinearInterpolationData* ys_) { @@ -171,11 +171,11 @@ namespace helpers { Nd4jLong inBatchNumValues = inHeight * inRowSize; Nd4jLong outRowSize = outWidth * channels; auto stream = context->getCudaStream(); - T const* pInput = images->getDataBuffer()->specialAsT(); //reinterpret_cast(images->getSpecialBuffer()); // this works only with 'c' direction + T const* pInput = images->getDataBuffer()->specialAsT(); //reinterpret_cast(images->specialBuffer()); // this works only with 'c' direction F* pOutput = output->dataBuffer()->specialAsT();//reinterpret_cast(output->specialBuffer()); dim3 batchSizeBlock(batchSize, 1, 1); dim3 pictureBlock(outHeight, outWidth, channels); - resizeImageKernel<<<256, 256, 256, *stream>>>(pInput, images->getSpecialShapeInfo(), pOutput, + resizeImageKernel<<<256, 256, 256, *stream>>>(pInput, images->specialShapeInfo(), pOutput, output->specialShapeInfo(), batchSize, outWidth, outHeight, channels, inRowSize, outRowSize, inBatchNumValues, xs_, ys_); @@ -255,7 +255,7 @@ namespace helpers { // resize by interpolation nearest neighbor algorithm kernel // template - static __global__ void resizeNeighborKernel(T const* input, Nd4jLong* inputShape, T* output, Nd4jLong* outputShape, + static __global__ void resizeNeighborKernel(T const* input, Nd4jLong const* inputShape, T* output, Nd4jLong const* outputShape, Nd4jLong batchSize, Nd4jLong inWidth, Nd4jLong inHeight, Nd4jLong outWidth, Nd4jLong outHeight, Nd4jLong channels, double widthScale, double heightScale, bool alignCorners, bool halfPixelCenters) { //for (int b = blockIdx.x; b < batchSize; b += gridDim.x) @@ -325,12 +325,12 @@ namespace helpers { float heightScale = calculateResizeScale(inHeight, outHeight, alignCorners); float widthScale = calculateResizeScale(inWidth, outWidth, alignCorners); - auto imagesBuffer = images->getDataBuffer()->specialAsT();//reinterpret_cast(images->getSpecialBuffer()); + auto imagesBuffer = images->getDataBuffer()->specialAsT();//reinterpret_cast(images->specialBuffer()); auto outputBuffer = output->dataBuffer()->specialAsT();//reinterpret_cast(output->specialBuffer()); auto stream = context->getCudaStream(); NDArray::prepareSpecialUse({output}, {images}); - resizeNeighborKernel<<>>(imagesBuffer, images->getSpecialShapeInfo(), outputBuffer, output->specialShapeInfo(), + resizeNeighborKernel<<>>(imagesBuffer, images->specialShapeInfo(), outputBuffer, output->specialShapeInfo(), batchSize, inWidth, inHeight, outWidth, outHeight, channels, widthScale, heightScale, alignCorners, halfPixelCenters); NDArray::registerSpecialUse({output}, {images}); @@ -1055,7 +1055,7 @@ namespace helpers { template static __global__ void resizeAreaKernel(ImageResizerState const* pSt, CachedInterpolation const* caches, float scale, - T const* inputPtr, Nd4jLong* inputShape, float* outputPtr, Nd4jLong* outputShape, ScaleCache* cachePool) { //batch * outWidth * outHeight + T const* inputPtr, Nd4jLong const* inputShape, float* outputPtr, Nd4jLong const* outputShape, ScaleCache* cachePool) { //batch * outWidth * outHeight for (auto batch = blockIdx.x; batch < pSt->batchSize; batch += gridDim.x) { for (auto y = threadIdx.x; y < pSt->outHeight; y += blockDim.x) { @@ -1106,7 +1106,7 @@ namespace helpers { static void resizeArea(cudaStream_t* stream, ImageResizerState const& st, CachedInterpolation* cache, NDArray const* input, NDArray* output) { - T const* inputPtr = reinterpret_cast(input->getSpecialBuffer()); + T const* inputPtr = reinterpret_cast(input->specialBuffer()); // float* yScales; // T const** yPtrs; float scale = 1.f / (st.heightScale * st.widthScale); @@ -1116,7 +1116,7 @@ namespace helpers { err = cudaMemcpyAsync(pSt, &st, sizeof(ImageResizerState), cudaMemcpyHostToDevice, *stream); ScaleCache* cachePool; err = cudaMalloc(&cachePool, sizeof(ScaleCache) * st.batchSize * st.outWidth * st.outHeight); - resizeAreaKernel<<<128, 2, 2048, *stream>>>(pSt, cache, scale, inputPtr, input->getSpecialShapeInfo(), outputPtr, + resizeAreaKernel<<<128, 2, 2048, *stream>>>(pSt, cache, scale, inputPtr, input->specialShapeInfo(), outputPtr, output->specialShapeInfo(), cachePool); err = cudaStreamSynchronize(*stream); err = cudaFree(cachePool); @@ -1197,9 +1197,9 @@ namespace helpers { // cropAndResize kernel type of input(images) and output should be the same // template - static __global__ void cropAndResizeKernel(T const *images, Nd4jLong* imagesShape, Z const* boxes, Nd4jLong* boxesShape, - I const* indices, Nd4jLong* indexShape, I const* cropSize, Nd4jLong* cropShape, int method, - double extrapolationVal, T* output, Nd4jLong* outputShape, int numBoxes, int cropHeight, int cropWidth, + static __global__ void cropAndResizeKernel(T const *images, Nd4jLong const* imagesShape, Z const* boxes, Nd4jLong const* boxesShape, + I const* indices, Nd4jLong const* indexShape, I const* cropSize, Nd4jLong const* cropShape, int method, + double extrapolationVal, T* output, Nd4jLong const* outputShape, int numBoxes, int cropHeight, int cropWidth, int batchSize, int imageHeight, int imageWidth, int depth) { for (int b = blockIdx.x; b < numBoxes; b += gridDim.x) @@ -1337,10 +1337,10 @@ namespace helpers { const int cropWidth = crops->sizeAt(2); const int depth = crops->sizeAt(3); auto stream = context->getCudaStream(); - T const* imagesBuf = reinterpret_cast(images->getSpecialBuffer()); - Z const* boxesBuf = reinterpret_cast(boxes->getSpecialBuffer()); - I const* indexBuf = reinterpret_cast(indices->getSpecialBuffer()); - I const* cropSizes = reinterpret_cast(cropSize->getSpecialBuffer()); + T const* imagesBuf = reinterpret_cast(images->specialBuffer()); + Z const* boxesBuf = reinterpret_cast(boxes->specialBuffer()); + I const* indexBuf = reinterpret_cast(indices->specialBuffer()); + I const* cropSizes = reinterpret_cast(cropSize->specialBuffer()); T* outBuf = reinterpret_cast(crops->specialBuffer()); int threadsPerBlock = math::nd4j_max(imageHeight * imageWidth, cropHeight * cropWidth); @@ -1348,8 +1348,8 @@ namespace helpers { threadsPerBlock = MAX_NUM_THREADS/4; NDArray::prepareSpecialUse({crops}, {images, boxes, indices, cropSize}); - cropAndResizeKernel<<>>(imagesBuf, images->getSpecialShapeInfo(), boxesBuf, boxes->getSpecialShapeInfo(), indexBuf, indices->getSpecialShapeInfo(), - cropSizes, cropSize->getSpecialShapeInfo(), method, extrapolationVal, outBuf, crops->specialShapeInfo(), numBoxes, cropHeight, cropWidth, batchSize, imageHeight, imageWidth, depth); + cropAndResizeKernel<<>>(imagesBuf, images->specialShapeInfo(), boxesBuf, boxes->specialShapeInfo(), indexBuf, indices->specialShapeInfo(), + cropSizes, cropSize->specialShapeInfo(), method, extrapolationVal, outBuf, crops->specialShapeInfo(), numBoxes, cropHeight, cropWidth, batchSize, imageHeight, imageWidth, depth); NDArray::registerSpecialUse({crops}, {images, boxes, indices, cropSize}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu index e6d9a27b1..8b7e8ee57 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu @@ -38,7 +38,7 @@ namespace helpers { // return value: true, if threshold is overcome, false otherwise // template - static __device__ bool needToSuppressWithThreshold(T* boxes, Nd4jLong* boxesShape, int previousIndex, int nextIndex, T threshold) { + static __device__ bool needToSuppressWithThreshold(T* boxes, Nd4jLong const* boxesShape, int previousIndex, int nextIndex, T threshold) { Nd4jLong previous0[] = {previousIndex, 0}; Nd4jLong previous1[] = {previousIndex, 1}; Nd4jLong previous2[] = {previousIndex, 2}; @@ -80,7 +80,7 @@ namespace helpers { } template - static __device__ T similirityV3(T* boxes, Nd4jLong* boxesShape, int previousIndex, int nextIndex) { + static __device__ T similirityV3(T* boxes, Nd4jLong const* boxesShape, int previousIndex, int nextIndex) { Nd4jLong previous0[] = {previousIndex, 0}; Nd4jLong previous1[] = {previousIndex, 1}; Nd4jLong previous2[] = {previousIndex, 2}; @@ -127,7 +127,7 @@ namespace helpers { // we compute boolean flag as shared uint32 and return it on final only for the first thread // template - static __global__ void shouldSelectKernel(T* boxesBuf, Nd4jLong* boxesShape, I* indexBuf, I* selectedIndicesData, double threshold, int numSelected, int i, bool* shouldSelect) { + static __global__ void shouldSelectKernel(T* boxesBuf, Nd4jLong const* boxesShape, I* indexBuf, I* selectedIndicesData, double threshold, int numSelected, int i, bool* shouldSelect) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = gridDim.x * blockDim.x; __shared__ unsigned int shouldSelectShared; @@ -242,7 +242,7 @@ namespace helpers { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - static __device__ bool checkOverlapBoxes(T* boxes, Nd4jLong* shape, T* scores, I* indices, I* selectedIndices, I* startIndices, I selectedSize, I nextCandidateIndex, T overlapThreshold, T scoreThreshold, bool simple) { + static __device__ bool checkOverlapBoxes(T* boxes, Nd4jLong const* shape, T* scores, I* indices, I* selectedIndices, I* startIndices, I selectedSize, I nextCandidateIndex, T overlapThreshold, T scoreThreshold, bool simple) { bool shouldHardSuppress = false; T& nextCandidateScore = scores[nextCandidateIndex]; I selectedIndex = indices[nextCandidateIndex]; @@ -276,8 +276,8 @@ namespace helpers { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template static __global__ void - suppressNonMaxOverlapKernel(T* boxes, Nd4jLong* boxesShape, T* scoresData, I* indices, I* startIndices, Nd4jLong length, I maxOutputLen, - T overlapThreshold, T scoreThreshold, I* output, Nd4jLong* outputShape, I* outputLength, bool simple) { + suppressNonMaxOverlapKernel(T* boxes, Nd4jLong const* boxesShape, T* scoresData, I* indices, I* startIndices, Nd4jLong length, I maxOutputLen, + T overlapThreshold, T scoreThreshold, I* output, Nd4jLong const* outputShape, I* outputLength, bool simple) { __shared__ I selectedSize; __shared__ I* tempOutput; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu b/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu index 54f306ef7..c26b79ee6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu @@ -69,8 +69,8 @@ linkage void rgbToYuvCudaLauncher(const int blocksPerGrid, const int threadsPerB /////////////////////////////////////////////////////////////////// void transformRgbYuv(sd::LaunchContext* context, const NDArray& input, NDArray& output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), { dimC }); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), { dimC }); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), { dimC }); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), { dimC }); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -80,7 +80,7 @@ void transformRgbYuv(sd::LaunchContext* context, const NDArray& input, NDArray& PointersManager manager(context, "yuv_to_rgb"); NDArray::prepareSpecialUse({ &output }, { &input }); - BUILD_SINGLE_SELECTOR(input.dataType(), rgbToYuvCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), packX.platformOffsets(), output.specialBuffer(), output.specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), rgbToYuvCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), packX.platformOffsets(), output.specialBuffer(), output.specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({ &output }, { &input }); manager.synchronize(); @@ -124,8 +124,8 @@ linkage void yuvToRgbCudaLauncher(const int blocksPerGrid, const int threadsPerB /////////////////////////////////////////////////////////////////// void transformYuvRgb(sd::LaunchContext* context, const NDArray& input, NDArray& output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), { dimC }); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), { dimC }); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), { dimC }); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), { dimC }); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -135,7 +135,7 @@ void transformYuvRgb(sd::LaunchContext* context, const NDArray& input, NDArray& PointersManager manager(context, "yuv_to_rgb"); NDArray::prepareSpecialUse({ &output }, { &input }); - BUILD_SINGLE_SELECTOR(input.dataType(), yuvToRgbCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), packX.platformOffsets(), output.specialBuffer(), output.specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), yuvToRgbCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), packX.platformOffsets(), output.specialBuffer(), output.specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({ &output }, { &input }); manager.synchronize(); @@ -200,7 +200,7 @@ void transformRgbGrs(sd::LaunchContext* context, const NDArray& input, NDArray& const int sharedMem = input.rankOf() * sizeof(int) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), rgbToGrsCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), dimC), NUMERIC_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), rgbToGrsCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), dimC), NUMERIC_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); @@ -287,8 +287,8 @@ static _CUDA_H void rgbToHsvCudaLauncher(const int blocksPerGrid, const int thre /////////////////////////////////////////////////////////////////// void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -298,7 +298,7 @@ void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray* PointersManager manager(context, "hsv_to_rgb"); NDArray::prepareSpecialUse({output}, {input}); - BUILD_SINGLE_SELECTOR(input->dataType(), hsvToRgbCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), hsvToRgbCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input}); manager.synchronize(); @@ -306,8 +306,8 @@ void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray* /////////////////////////////////////////////////////////////////// void transformRgbHsv(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -317,7 +317,7 @@ void transformRgbHsv(sd::LaunchContext* context, const NDArray* input, NDArray* PointersManager manager(context, "rgb_to_hsv"); NDArray::prepareSpecialUse({output}, {input}); - BUILD_SINGLE_SELECTOR(input->dataType(), rgbToHsvCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), rgbToHsvCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input}); manager.synchronize(); @@ -389,21 +389,21 @@ __global__ void tripleTransformerCuda(const void *vx, const Nd4jLong *xShapeInfo template static void rgbYiq(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); NDArray::prepareSpecialUse({output}, {input}); - return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 1, packZ.numberOfTads()); + return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 1, packZ.numberOfTads()); NDArray::registerSpecialUse({output}, {input}); } template FORCEINLINE static void yiqRgb(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); NDArray::prepareSpecialUse({output}, {input}); - return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 2, packZ.numberOfTads()); + return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 2, packZ.numberOfTads()); NDArray::registerSpecialUse({output}, {input}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu b/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu index 27f4f35f2..723b0f215 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu @@ -61,7 +61,7 @@ static void ismax_(sd::LaunchContext * context, const NDArray* input, NDArray* o int dimensionLength = dimensions.size(); std::vector copy(dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), copy.data(), copy.size()); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), copy.data(), copy.size()); // we launch legacy IndexMax op, to get indices of max values along dimension auto indexMaxArr = input->applyIndexReduce(indexreduce::IndexMax, dimensions); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu b/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu index 57bb205a9..ebc0732e2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu @@ -27,7 +27,7 @@ namespace ops { namespace helpers { template - static _CUDA_G void lrnKernel(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, Nd4jLong numTads, Nd4jLong tadLength, int depth, double bias, double alpha, double beta) { + static _CUDA_G void lrnKernel(void *vx, Nd4jLong const*xTadShapeInfo, Nd4jLong const*xTadOffsets, void *vz, Nd4jLong const*zTadShapeInfo, Nd4jLong const*zTadOffsets, Nd4jLong numTads, Nd4jLong tadLength, int depth, double bias, double alpha, double beta) { extern __shared__ char sharedChar[]; T* shared = reinterpret_cast(sharedChar); @@ -63,7 +63,7 @@ namespace helpers { } template - static _CUDA_G void lrnBPKernel(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, Nd4jLong numTads, Nd4jLong tadLength, int depth, double bias, double alpha, double beta) { + static _CUDA_G void lrnBPKernel(void const* vx, Nd4jLong const* xTadShapeInfo, Nd4jLong const* xTadOffsets, void *vz, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets, Nd4jLong numTads, Nd4jLong tadLength, int depth, double bias, double alpha, double beta) { extern __shared__ char sharedChar[]; X* sharedX = reinterpret_cast(sharedChar); Z* sharedY = reinterpret_cast(sharedX + blockDim.x); @@ -82,7 +82,7 @@ namespace helpers { for (uint i = blockIdx.x; i < numTads; i += gridDim.x) { - auto x = reinterpret_cast(vx) + xTadOffsets[i]; + auto x = reinterpret_cast(vx) + xTadOffsets[i]; auto z = reinterpret_cast(vz) + zTadOffsets[i]; const uint begin = sd::math::nd4j_max(0, threadIdx.x - depth); @@ -116,8 +116,8 @@ namespace helpers { template static void lrnBP_(sd::graph::Context& block, const NDArray& input, const NDArray& gradO, NDArray& gradI, const int depth, const float bias, const float alpha, const float beta) { auto rank = input.rankOf(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), {rank - 1}); - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.getShapeInfo(), {rank - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {rank - 1}); + auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), {rank - 1}); const auto tadLength = shape::length(packX.primaryShapeInfo()); const int numBlocks = sd::math::nd4j_min(1024, packX.numberOfTads()); @@ -126,7 +126,7 @@ namespace helpers { if (tadLength > 1024 || tadLength < 1) throw std::runtime_error("LRN: tadLength > 1024 isn't implemented yet"); - lrnBPKernel<<getCudaStream()>>>(input.getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), gradI.specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), packX.numberOfTads(), tadLength, depth, bias, alpha, beta); + lrnBPKernel<<getCudaStream()>>>(input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), gradI.specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), packX.numberOfTads(), tadLength, depth, bias, alpha, beta); gradI.tickWriteDevice(); gradI *= gradO; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu b/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu index a3b029c0b..8d8548be5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu @@ -33,7 +33,7 @@ namespace ops { namespace helpers { template - static __global__ void fillRegularizerKernel(T* ioMatrixData, Nd4jLong* ioMatrixShape, Nd4jLong* ioMatrixTads, Nd4jLong* ioMatrixOffsets, Nd4jLong batchSize, Nd4jLong rows, T const value) { + static __global__ void fillRegularizerKernel(T* ioMatrixData, const Nd4jLong* ioMatrixShape, const Nd4jLong* ioMatrixTads, const Nd4jLong* ioMatrixOffsets, Nd4jLong batchSize, Nd4jLong rows, T const value) { for (auto x = blockIdx.x; x < batchSize; x += gridDim.x) { auto z = ioMatrixData + ioMatrixOffsets[x]; @@ -61,7 +61,7 @@ namespace helpers { if (fast) { // Cholesky decomposition approach // Equation for solve A^T * Ax = A^T * b, so // 1. Computing A2: - auto tAtShape = ShapeUtils::evalShapeForMatmul(leftInput->getShapeInfo(), leftInput->getShapeInfo(), true, false); + auto tAtShape = ShapeUtils::evalShapeForMatmul(leftInput->shapeInfo(), leftInput->shapeInfo(), true, false); //tAtShape[tAtShape.size() - 2] = output->sizeAt(-2); NDArray leftOutput(leftInput->ordering(), tAtShape, output->dataType(), context); MmulHelper::matmul(leftInput, leftInput, &leftOutput, true, false); // Computing A2 = A^T * A diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu index c986260e8..682b2eee9 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu @@ -37,9 +37,9 @@ namespace helpers { // invert the second diagonal for lower diagonal matrix template static __global__ void - invertKernelLow(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { - T* inverted = reinterpret_cast(invertedBuf); - T* input = reinterpret_cast(inputBuf); + invertKernelLow(void *invertedBuf, const Nd4jLong *invertedShape, const void *inputBuf, const Nd4jLong *inputShape, Nd4jLong n) { + auto inverted = reinterpret_cast(invertedBuf); + auto input = reinterpret_cast(inputBuf); auto start = threadIdx.x + blockIdx.x * blockDim.x; auto step = blockDim.x * gridDim.x; @@ -61,9 +61,9 @@ namespace helpers { // invert diagonal vals to upper diagonal matrix template static __global__ void - upvertKernel(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { - T *inverted = reinterpret_cast(invertedBuf); - T *input = reinterpret_cast(inputBuf); + upvertKernel(void *invertedBuf, const Nd4jLong *invertedShape, const void *inputBuf, const Nd4jLong *inputShape, Nd4jLong n) { + auto inverted = reinterpret_cast(invertedBuf); + auto input = reinterpret_cast(inputBuf); auto start = threadIdx.x + blockIdx.x * blockDim.x; auto step = blockDim.x * gridDim.x; @@ -72,7 +72,7 @@ namespace helpers { Nd4jLong pos[] = {i, i}; auto xIndex = shape::getOffset(inputShape, pos); auto zIndex = shape::getOffset(invertedShape, pos); -// math::atomics::nd4j_atomicDiv(&inverted[zIndex], input[xIndex]); + // invert diagonal elements inverted[zIndex] /= input[xIndex]; } @@ -82,13 +82,13 @@ namespace helpers { // invert upper second diagonal template static __global__ void - upvertKernelUp(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { + upvertKernelUp(void *invertedBuf, const Nd4jLong *invertedShape, const void *inputBuf, const Nd4jLong *inputShape, Nd4jLong n) { __shared__ T* inverted; - __shared__ T* input; + __shared__ const T* input; if (threadIdx.x == 0) { inverted = reinterpret_cast(invertedBuf); - input = reinterpret_cast(inputBuf); + input = reinterpret_cast(inputBuf); } __syncthreads(); @@ -110,15 +110,11 @@ namespace helpers { // ------------------------------------------------------------------------------------------------------------------ // template static __global__ void - invertLowKernel(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { + invertLowKernel(void *invertedBuf, const Nd4jLong *invertedShape, const void *inputBuf, const Nd4jLong *inputShape, Nd4jLong n) { + + auto input = reinterpret_cast(inputBuf); + auto inverted = reinterpret_cast(invertedBuf); - T *inverted = reinterpret_cast(invertedBuf); - T *input = reinterpret_cast(inputBuf); - if (threadIdx.x == 0) { - inverted = reinterpret_cast(invertedBuf); - input = reinterpret_cast(inputBuf); - } - __syncthreads(); auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = gridDim.x * blockDim.x; @@ -145,15 +141,14 @@ namespace helpers { // Invertion of upper triangular matrix non-diagonal elements when main and second diagonals already processed template static __global__ void - invertUpKernel(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { - __shared__ T* inverted; - __shared__ T* input; + invertUpKernel( + void *invertedBuf, const Nd4jLong *invertedShape, + const void *inputBuf, const Nd4jLong *inputShape, + Nd4jLong n) { + + auto inverted = reinterpret_cast(invertedBuf);; + auto input = reinterpret_cast(inputBuf); - if (threadIdx.x == 0) { - inverted = reinterpret_cast(invertedBuf);; - input = reinterpret_cast(inputBuf); - } - __syncthreads(); auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; @@ -264,15 +259,15 @@ namespace helpers { // output - a N-D tensor buffer with rank not less than 2, input - 2D square n x n matrix with n = rowLen template static __global__ void - fillMatrix(void *output, Nd4jLong *outShape, void *input, Nd4jLong *inputShape, Nd4jLong pos, Nd4jLong rowLen) { + fillMatrix(void *output, const Nd4jLong *outShape, const void *input, const Nd4jLong *inputShape, Nd4jLong pos, Nd4jLong rowLen) { __shared__ F *matrix; - __shared__ T *inputBuf; + __shared__ const T *inputBuf; __shared__ Nd4jLong inputLen; __shared__ Nd4jLong n2; if (threadIdx.x == 0) { matrix = reinterpret_cast(output); - inputBuf = reinterpret_cast(input); + inputBuf = reinterpret_cast(input); inputLen = shape::length(inputShape); n2 = rowLen * rowLen; } @@ -291,15 +286,14 @@ namespace helpers { // same as above, but without type conversion template static __global__ void - returnMatrix(void *output, Nd4jLong *outputShape, void *input, Nd4jLong *inputShape, Nd4jLong pos, Nd4jLong rowLen) { - __shared__ T* matrix; - __shared__ T* outputBuf; + returnMatrix(void *output, const Nd4jLong *outputShape, const void *input, const Nd4jLong *inputShape, Nd4jLong pos, Nd4jLong rowLen) { __shared__ Nd4jLong outputLen; __shared__ Nd4jLong n2; + auto matrix = reinterpret_cast(input); + auto outputBuf = reinterpret_cast(output); if (threadIdx.x == 0) { - matrix = reinterpret_cast(input); - outputBuf = reinterpret_cast(output); + outputLen = shape::length(inputShape); n2 = rowLen * rowLen; } @@ -316,7 +310,7 @@ namespace helpers { // ------------------------------------------------------------------------------------------------------------------ // // fill up permutaion matrix kernel. Permutation matrix filled with zeros and ones template - static __global__ void fillUpPermutation(void *output, Nd4jLong *shape, int *source, int rowNum) { + static __global__ void fillUpPermutation(void *output, const Nd4jLong *shape, int *source, int rowNum) { F *permutation = reinterpret_cast(output); auto start = blockIdx.x * blockDim.x + threadIdx.x; @@ -515,7 +509,7 @@ namespace helpers { BUILD_DOUBLE_TEMPLATE(template void lup_,(LaunchContext * context, NDArray * input, NDArray * output, NDArray * permutation), FLOAT_NATIVE, INDEXING_TYPES); template - static __device__ void swapRows(T* matrix, Nd4jLong* shape, Nd4jLong theFirst, Nd4jLong theSecond, Nd4jLong n) { + static __device__ void swapRows(T* matrix, const Nd4jLong* shape, Nd4jLong theFirst, Nd4jLong theSecond, Nd4jLong n) { if (theFirst != theSecond) { for (auto i = 0; i < n; i++) { Nd4jLong theFirstPos[] = {theFirst, i}; @@ -528,7 +522,7 @@ namespace helpers { } template - static __device__ void processColumns(Nd4jLong currentRow, Nd4jLong rowNum, T* compoundBuf, Nd4jLong* compoundShape) { + static __device__ void processColumns(Nd4jLong currentRow, Nd4jLong rowNum, T* compoundBuf, const Nd4jLong* compoundShape) { Nd4jLong xDiag[] = {currentRow, currentRow}; auto diagIndex = shape::getOffset(compoundShape, xDiag, 0); for (auto j = currentRow + 1; j < rowNum; j++) { @@ -546,7 +540,7 @@ namespace helpers { } template - __device__ Nd4jLong argmaxCol(Nd4jLong column, T* compoundBuffer, Nd4jLong* compoundShape) { + __device__ Nd4jLong argmaxCol(Nd4jLong column, T* compoundBuffer, const Nd4jLong* compoundShape) { auto rowNum = shape::sizeAt(compoundShape, 0); Nd4jLong xInitial[] = {column, column}; auto xInitialIndex = shape::getOffset(compoundShape, xInitial, 0); @@ -565,7 +559,7 @@ namespace helpers { } template - static __device__ int luNN(T* matrix, Nd4jLong* shape, I* permutation, Nd4jLong* permuShape, Nd4jLong n) { + static __device__ int luNN(T* matrix, const Nd4jLong* shape, I* permutation, const Nd4jLong* permuShape, Nd4jLong n) { for (auto i = 0; i < n - 1; i++) { auto pivotIndex = argmaxCol(i, matrix, shape); @@ -581,9 +575,12 @@ namespace helpers { } template - static __global__ void luBatchedKernel(T* outputBuf, Nd4jLong* outputShape, I* permutations, Nd4jLong* permuShape, - Nd4jLong* outputTadShape, Nd4jLong* outputTadOffsets, Nd4jLong* permuTadShape, Nd4jLong* permuTadOffsets, - Nd4jLong batchNum) { + static __global__ void luBatchedKernel( + T* outputBuf, const Nd4jLong* outputShape, + I* permutations, const Nd4jLong* permuShape, + const Nd4jLong* outputTadShape, const Nd4jLong* outputTadOffsets, + const Nd4jLong* permuTadShape, const Nd4jLong* permuTadOffsets, + Nd4jLong batchNum) { auto start = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; @@ -627,7 +624,7 @@ namespace helpers { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; std::vector dims(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); // DataType dtype = input->dataType(); // if (dtype != DataType::DOUBLE) @@ -651,8 +648,7 @@ namespace helpers { auto inputBuf = reinterpret_cast(matrix.specialBuffer()); auto outputBuf = reinterpret_cast(output->specialBuffer()) + offset; // if (matrix.dataType() == input->dataType()) - determinantKernel << < launchDims.x, launchDims.y, launchDims.z, *stream >> > - (inputBuf, outputBuf, n); + determinantKernel<<< launchDims.x, launchDims.y, launchDims.z, *stream>>>(inputBuf, outputBuf, n); // else // determinantKernel<<>> (inputBuf, outputBuf, n); } @@ -672,7 +668,7 @@ namespace helpers { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; std::vector dims(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); DataType dtype = input->dataType(); if (dtype != DataType::DOUBLE) @@ -718,8 +714,11 @@ namespace helpers { template static __global__ void - fillLowerUpperKernel(void *lowerBuf, Nd4jLong *lowerShape, void *upperBuf, Nd4jLong *upperShape, - void *matrixBuf, Nd4jLong *matrixShape, Nd4jLong n) { + fillLowerUpperKernel( + void *lowerBuf, const Nd4jLong *lowerShape, + void *upperBuf, const Nd4jLong *upperShape, + void *matrixBuf, const Nd4jLong *matrixShape, + Nd4jLong n) { __shared__ T *lowerMatrix; __shared__ T *upperMatrix; @@ -760,10 +759,10 @@ namespace helpers { NDArray lower = NDArrayFactory::create('c', {n, n}, dtype, context); NDArray compound = NDArrayFactory::create('c', {n, n}, dtype, context); NDArray permutation = NDArrayFactory::create('c', {n, n}, dtype, context); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 2, output->rankOf() - 1}); auto stream = context->getCudaStream(); @@ -792,7 +791,7 @@ namespace helpers { sd::MmulHelper::mmul(&matrix, &compound, &upper, 1.0, 0.0); upper.tickWriteDevice(); // upper.printIndexedBuffer("Full inverted"); - returnMatrix <<<1, n2, 1024, *stream>>>(output->specialBuffer(), output->specialShapeInfo(), upper.specialBuffer(), upper.specialShapeInfo(), i * n2, n); + returnMatrix<<<1, n2, 1024, *stream>>>(output->specialBuffer(), output->specialShapeInfo(), upper.specialBuffer(), upper.specialShapeInfo(), i * n2, n); } return Status::OK(); } @@ -808,7 +807,7 @@ namespace helpers { } template - __global__ void fillBatchKernel(F **dArrayBatch, F *buf, Nd4jLong *offsets, Nd4jLong batchSize) { + __global__ void fillBatchKernel(F **dArrayBatch, F *buf, const Nd4jLong *offsets, Nd4jLong batchSize) { auto start = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; @@ -819,7 +818,7 @@ namespace helpers { template __global__ void - adjustResultsKernel(F *dArray, Nd4jLong *shape, Nd4jLong *offsets, Nd4jLong batchSize, Nd4jLong n) { + adjustResultsKernel(F *dArray, const Nd4jLong *shape, const Nd4jLong *offsets, Nd4jLong batchSize, Nd4jLong n) { //auto i = blockIdx.x * blockDim.x + threadIdx.x; Nd4jLong *shapeOf = shape::shapeOf(shape); Nd4jLong *strideOf = shape::stride(shape); @@ -850,7 +849,7 @@ namespace helpers { throw cuda_exception::build("helpers::cholesky_: Cannot create solver handle", status); } F **dArrayBatch = nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.getShapeInfo(), + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.shapeInfo(), {tempOutput.rankOf() - 2, tempOutput.rankOf() - 1}); const Nd4jLong batchSize = packX.numberOfTads(); @@ -865,8 +864,7 @@ namespace helpers { throw cuda_exception::build("helpers::cholesky_: Cannot allocate memory for solver errors buffer", err); } auto stream = context->getCudaStream(); - fillBatchKernel << < 1, batchSize, 128, *stream >> > - (dArrayBatch, reinterpret_cast(tempOutput.specialBuffer()), packX.specialOffsets(), batchSize); + fillBatchKernel<<<1, batchSize, 128, *stream>>>(dArrayBatch, reinterpret_cast(tempOutput.specialBuffer()), packX.specialOffsets(), batchSize); status = cusolverDnSetStream(handle, *stream); if (CUSOLVER_STATUS_SUCCESS != status) { @@ -895,8 +893,7 @@ namespace helpers { if (CUSOLVER_STATUS_SUCCESS != status) { throw cuda_exception::build("helpers::cholesky_: Cholesky factorization failed for batch", status); } - adjustResultsKernel << < batchSize, n2, 128, *stream >> > - (reinterpret_cast(tempOutput.specialBuffer()), packX.specialShapeInfo(), packX.specialOffsets(), batchSize, n); + adjustResultsKernel<<>>(reinterpret_cast(tempOutput.specialBuffer()), packX.specialShapeInfo(), packX.specialOffsets(), batchSize, n); err = cudaFree(dArrayBatch); if (err) { @@ -944,9 +941,11 @@ namespace helpers { FLOAT_NATIVE); template - __global__ void - logDetKernel(T *inputBuf, Nd4jLong *inputShape, Nd4jLong batchNum, Nd4jLong *tadShape, Nd4jLong *tadOffsets, - T *outputBuf, Nd4jLong *outputShape) { + __global__ void logDetKernel( + const T *inputBuf, const Nd4jLong *inputShape, + Nd4jLong batchNum, + const Nd4jLong *tadShape, const Nd4jLong *tadOffsets, + T *outputBuf, const Nd4jLong *outputShape) { __shared__ int n; if (threadIdx.x == 0) { @@ -954,11 +953,11 @@ namespace helpers { } __syncthreads(); - T *output = outputBuf; - T *input = inputBuf; + auto output = outputBuf; + auto input = inputBuf; for (auto i = blockIdx.x; i < batchNum; i += gridDim.x) { - T *current = input + tadOffsets[i]; + auto current = input + tadOffsets[i]; auto zIndex = shape::getIndexOffset(i, outputShape); for (auto e = threadIdx.x; e < n; e += blockDim.x) { @@ -981,10 +980,10 @@ namespace helpers { auto outputBuf = output->dataBuffer()->specialAsT(); //reinterpret_cast(output->specialBuffer()); // + e * n2; // + e * n2; auto inputBuf = tempOutput.dataBuffer()->specialAsT(); //reinterpret_cast(tempOutput.specialBuffer()); output->nullify(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.getShapeInfo(), + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.shapeInfo(), {tempOutput.rankOf() - 2, tempOutput.rankOf() - 1}); - logDetKernel <<<128, 512, 256, *stream>>>(inputBuf, tempOutput.specialShapeInfo(), + logDetKernel<<<128, 512, 256, *stream>>>(inputBuf, tempOutput.specialShapeInfo(), packX.numberOfTads(), packX.specialShapeInfo(), packX.specialOffsets(), outputBuf, output->specialShapeInfo()); output->tickWriteDevice(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrixSetDiag.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrixSetDiag.cu index e5773abf5..97124c3db 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/matrixSetDiag.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/matrixSetDiag.cu @@ -91,7 +91,7 @@ void matrixSetDiag(sd::LaunchContext* context, const NDArray& input, const NDArr PointersManager manager(context, "matrixSetDiag"); NDArray::prepareSpecialUse({&output}, {&input, &diagonal}); - BUILD_SINGLE_SELECTOR(input.dataType(), matrixSetDiagCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), diagonal.getSpecialBuffer(), diagonal.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), zeroPad), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), matrixSetDiagCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), diagonal.specialBuffer(), diagonal.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), zeroPad), LIBND4J_TYPES); NDArray::registerSpecialUse({&output}, {&input, &diagonal}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu index 3c1305391..78249bc38 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu @@ -43,9 +43,13 @@ namespace helpers { // inputLength - input subarray length // template - static __global__ void matrixBandKernel(void* inputBuffer, Nd4jLong* inputShape, - void* outputBuffer, Nd4jLong* outputShape, Nd4jLong lowerBand, Nd4jLong upperBand, Nd4jLong* tadOnlyInputShapeInfo, Nd4jLong* tadInputOffsets, - Nd4jLong* tadOnlyOutputShapeInfo, Nd4jLong* tadOutputOffsets, Nd4jLong numTads, Nd4jLong inputLength) { + static __global__ void matrixBandKernel(const void* inputBuffer, const Nd4jLong* inputShape, + void* outputBuffer, const Nd4jLong* outputShape, + Nd4jLong lowerBand, Nd4jLong upperBand, + const Nd4jLong* tadOnlyInputShapeInfo, const Nd4jLong* tadInputOffsets, + const Nd4jLong* tadOnlyOutputShapeInfo, const Nd4jLong* tadOutputOffsets, + Nd4jLong numTads, + Nd4jLong inputLength) { int totalThreads = blockDim.x; Nd4jLong rows = shape::sizeAt(inputShape, -2); Nd4jLong cols = shape::sizeAt(inputShape, -1); @@ -90,14 +94,14 @@ namespace helpers { std::vector lastDims({input->rankOf() - 2, input->rankOf() - 1}); std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(input->rankOf(), lastDims); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), lastDims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), lastDims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), lastDims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), lastDims); const Nd4jLong numTads = packX.numberOfTads(); NDArray::prepareSpecialUse({output}, {input}); - matrixBandKernel<<>>(input->getSpecialBuffer(), - input->getSpecialShapeInfo(), output->getSpecialBuffer(), output->getSpecialShapeInfo(), + matrixBandKernel<<>>(input->specialBuffer(), + input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), lowerBand, upperBand, packX.specialShapeInfo(), packX.specialOffsets(), packZ.specialShapeInfo(), packZ.specialOffsets(), numTads, input->lengthOf()); NDArray::registerSpecialUse({output}, {input}); } @@ -106,7 +110,6 @@ namespace helpers { void matrixBandPart(sd::LaunchContext * context, NDArray* input, NDArray* output, Nd4jLong lowerBand, Nd4jLong upperBand) { BUILD_SINGLE_SELECTOR(input->dataType(), matrixBandPart_, (context, input, output, lowerBand, upperBand), FLOAT_TYPES); } - BUILD_SINGLE_TEMPLATE(template void matrixBandPart_, (sd::LaunchContext * context, NDArray* input, NDArray* output, Nd4jLong lowerBand, Nd4jLong upperBand), FLOAT_TYPES); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu index 7d78d0323..30d5f0ef9 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu @@ -35,8 +35,8 @@ namespace helpers { // put diagonals from input batched matricies to output batched vectors template static __global__ void matrixDiagPartKernel(void const* inputBuffer, void* outputBuffer, Nd4jLong numTads, Nd4jLong inputLength, - Nd4jLong* tadOnlyInputShapeInfo, Nd4jLong *tadInputOffsets, - Nd4jLong* tadOnlyOutputShapeInfo, Nd4jLong *tadOutputOffsets) { + const Nd4jLong* tadOnlyInputShapeInfo, const Nd4jLong *tadInputOffsets, + const Nd4jLong* tadOnlyOutputShapeInfo, const Nd4jLong *tadOutputOffsets) { int totalThreads = blockDim.x; for (Nd4jLong i = blockIdx.x; i < numTads; i += gridDim.x) { auto yOffset = tadInputOffsets[i]; @@ -66,13 +66,13 @@ namespace helpers { Nd4jLong lastDimension = sd::math::nd4j_min(input->sizeAt(-2), input->sizeAt(-1)); std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(output->rankOf(), {output->rankOf() - 1}); - const Nd4jLong numTads = ShapeUtils::getNumOfSubArrs(input->getShapeInfo(), dimsToExclude); //this->tensorsAlongDimension({dimension}); + const Nd4jLong numTads = ShapeUtils::getNumOfSubArrs(input->shapeInfo(), dimsToExclude); //this->tensorsAlongDimension({dimension}); //printf("Repeat delta %lld, numTads %lld\n", repeatDelta, numTads); //tadOnlyInputShapeInfo, tadInputOffsets, tadOnlyOutputShapeInfo, tadOutputOffsets; std::vector outputDims({output->rankOf() - 1}); std::vector inputDims({input->rankOf() - 2, input->rankOf() - 1}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), inputDims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), outputDims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), inputDims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), outputDims); if (!output->isActualOnDeviceSide()) @@ -83,7 +83,7 @@ namespace helpers { dim3 launchDims(256, 512, 8192); - matrixDiagPartKernel<<>>(input->getSpecialBuffer(), output->getSpecialBuffer(), numTads, lastDimension, packX.specialShapeInfo(), packX.specialOffsets(), packZ.specialShapeInfo(), packZ.specialOffsets()); + matrixDiagPartKernel<<>>(input->specialBuffer(), output->specialBuffer(), numTads, lastDimension, packX.specialShapeInfo(), packX.specialOffsets(), packZ.specialShapeInfo(), packZ.specialOffsets()); return Status::OK(); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/max_pooling.cu b/libnd4j/include/ops/declarable/helpers/cuda/max_pooling.cu index b809647c1..6e70d4510 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/max_pooling.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/max_pooling.cu @@ -27,7 +27,7 @@ namespace ops { namespace helpers { template - static _CUDA_G void indicesFiller(void *vz, Nd4jLong *zShapeInfo, Nd4jLong part, Nd4jLong bSize) { + static _CUDA_G void indicesFiller(void *vz, Nd4jLong const* zShapeInfo, Nd4jLong part, Nd4jLong bSize) { auto z = reinterpret_cast(vz); for (int b = blockIdx.x; b < bSize; b += gridDim.x) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/merge.cu b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu index a7dd9b199..3c580ee33 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/merge.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu @@ -34,7 +34,7 @@ namespace sd { namespace helpers { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeMaxIndexCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void mergeMaxIndexCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, const Nd4jLong* outputShape, Nd4jLong length) { auto output = reinterpret_cast(voutput); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -62,11 +62,11 @@ namespace sd { static void mergeMaxIndex_(sd::LaunchContext* context, const std::vector& inArrs, NDArray& output) { int nArrSize = static_cast(inArrs.size()); - std::vector inBuffers(nArrSize), inShapes(nArrSize); + std::vector inBuffers(nArrSize), inShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeMaxIndex"); @@ -78,7 +78,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeMaxIndexCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, nArrSize, output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + mergeMaxIndexCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, nArrSize, output.specialBuffer(), output.specialShapeInfo(), length); manager.synchronize(); } @@ -95,7 +95,7 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeMaxCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void mergeMaxCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, const Nd4jLong* outputShape, Nd4jLong length) { auto output = reinterpret_cast(voutput); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -121,11 +121,11 @@ namespace sd { int nArrsSize = static_cast(inArrs.size()); - std::vector inBuffers(nArrsSize), inShapes(nArrsSize); + std::vector inBuffers(nArrsSize), inShapes(nArrsSize); for (int e = 0; e < nArrsSize; e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeMax"); @@ -137,7 +137,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeMaxCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, nArrsSize, output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + mergeMaxCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, nArrsSize, output.specialBuffer(), output.specialShapeInfo(), length); manager.synchronize(); } @@ -153,10 +153,15 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeMaxBpCudaLauncher(void** inArrs, void** inShapes, void* vgradient, Nd4jLong* gradientShape, const int numArrays, - void** outArrs, void** outShapes, Nd4jLong length, bool bSameOrderAndEws1) { + static __global__ void mergeMaxBpCudaLauncher( + void** inArrs, void** inShapes, + const void* vgradient, const Nd4jLong* gradientShape, + const int numArrays, + void** outArrs, void** outShapes, + Nd4jLong length, + bool bSameOrderAndEws1) { - auto grad = reinterpret_cast(vgradient); + auto grad = reinterpret_cast(vgradient); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; @@ -204,13 +209,13 @@ namespace sd { template static void mergeMaxBp_(sd::LaunchContext* context, const std::vector& inArrs, std::vector& outArrs, int nArrSize, bool bSameOrderAndEws1) { - std::vector inBuffers(nArrSize), inShapes(nArrSize), outBuffers(nArrSize), outShapes(nArrSize); + std::vector inBuffers(nArrSize), inShapes(nArrSize), outBuffers(nArrSize), outShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); - outBuffers[e] = outArrs[e]->getSpecialBuffer(); - outShapes[e] = outArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); + outBuffers[e] = outArrs[e]->specialBuffer(); + outShapes[e] = outArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeMaxBp"); @@ -226,8 +231,8 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeMaxBpCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, inArrs[nArrSize]->getSpecialBuffer(), - inArrs[nArrSize]->getSpecialShapeInfo(), nArrSize, pOutBuffers, pOutShapes, + mergeMaxBpCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, inArrs[nArrSize]->specialBuffer(), + inArrs[nArrSize]->specialShapeInfo(), nArrSize, pOutBuffers, pOutShapes, length, bSameOrderAndEws1); manager.synchronize(); @@ -261,7 +266,7 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeAvgCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void mergeAvgCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, const Nd4jLong* outputShape, Nd4jLong length) { auto output = reinterpret_cast(voutput); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -284,11 +289,11 @@ namespace sd { template static void mergeAvg_(sd::LaunchContext* context, const std::vector& inArrs, NDArray& output) { - std::vector inBuffers(inArrs.size()), inShapes(inArrs.size()); + std::vector inBuffers(inArrs.size()), inShapes(inArrs.size()); for (int e = 0; e < inArrs.size(); e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeAvg"); @@ -300,7 +305,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeAvgCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, (int)inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + mergeAvgCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, (int)inArrs.size(), output.specialBuffer(), output.specialShapeInfo(), length); manager.synchronize(); } @@ -315,10 +320,14 @@ namespace sd { } ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeAvgBpCudaLauncher(void* vgradient, Nd4jLong* gradientShape, void** outArrs, void** outShapes, - const int numArrays, Nd4jLong length, bool bSameOrderAndEws1) { + static __global__ void mergeAvgBpCudaLauncher( + const void* vgradient, const Nd4jLong* gradientShape, + void** outArrs, void** outShapes, + const int numArrays, + Nd4jLong length, + bool bSameOrderAndEws1) { - auto grad = reinterpret_cast(vgradient); + auto grad = reinterpret_cast(vgradient); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; @@ -352,11 +361,11 @@ namespace sd { int nArrSize = static_cast(outArrs.size()); - std::vector outBuffers(nArrSize), outShapes(nArrSize); + std::vector outBuffers(nArrSize), outShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - outBuffers[e] = outArrs[e]->getSpecialBuffer(); - outShapes[e] = outArrs[e]->getSpecialShapeInfo(); + outBuffers[e] = outArrs[e]->specialBuffer(); + outShapes[e] = outArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeAvgBp"); @@ -369,7 +378,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeAvgBpCudaLauncher << getCudaStream() >> > (gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), + mergeAvgBpCudaLauncher<<getCudaStream()>>>(gradient.specialBuffer(), gradient.specialShapeInfo(), pOutBuffers, pOutShapes, nArrSize, length, bSameOrderAndEws1); manager.synchronize(); @@ -396,7 +405,7 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeAddCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void mergeAddCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, const Nd4jLong* outputShape, Nd4jLong length) { auto output = reinterpret_cast(voutput); @@ -421,11 +430,11 @@ namespace sd { static void mergeAdd_(sd::LaunchContext* context, const std::vector& inArrs, NDArray& output) { int nArrSize = static_cast(inArrs.size()); - std::vector inBuffers(nArrSize), inShapes(nArrSize); + std::vector inBuffers(nArrSize), inShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeAdd"); @@ -437,7 +446,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeAddCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, nArrSize, output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + mergeAddCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, nArrSize, output.specialBuffer(), output.specialShapeInfo(), length); manager.synchronize(); } @@ -454,10 +463,10 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeAddBpCudaLauncher(void* vgradient, Nd4jLong* gradientShape, void** outArrs, void** outShapes, + static __global__ void mergeAddBpCudaLauncher(const void* vgradient, const Nd4jLong* gradientShape, void** outArrs, void** outShapes, const int numArrays, Nd4jLong length, bool bSameOrderAndEws1) { - auto grad = reinterpret_cast(vgradient); + auto grad = reinterpret_cast(vgradient); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; @@ -491,11 +500,11 @@ namespace sd { int nArrSize = static_cast(outArrs.size()); - std::vector outBuffers(nArrSize), outShapes(nArrSize); + std::vector outBuffers(nArrSize), outShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - outBuffers[e] = outArrs[e]->getSpecialBuffer(); - outShapes[e] = outArrs[e]->getSpecialShapeInfo(); + outBuffers[e] = outArrs[e]->specialBuffer(); + outShapes[e] = outArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeAddBp"); @@ -508,7 +517,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeAddBpCudaLauncher << getCudaStream() >> > (gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), + mergeAddBpCudaLauncher<<getCudaStream()>>>(gradient.specialBuffer(), gradient.specialShapeInfo(), pOutBuffers, pOutShapes, nArrSize, length, bSameOrderAndEws1); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu b/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu index 53570a0ba..3f2ed13b5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu @@ -88,12 +88,12 @@ namespace helpers { } PointersManager pm(context, "meshgrid"); - std::vector hInBuffers(rank); + std::vector hInBuffers(rank); std::vector hOutBuffers(rank); - std::vector hInShapes(rank); + std::vector hInShapes(rank); - std::vector hOutTadShapes(rank); - std::vector hOutTadOffsets(rank); + std::vector hOutTadShapes(rank); + std::vector hOutTadOffsets(rank); std::vector hNumTads(rank); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu index 4f26ef397..c3b4abc51 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu @@ -30,7 +30,7 @@ namespace ops { namespace helpers { template - static __global__ void fillUpElementKernel(void* outputBuffer, Nd4jLong* outputShapeInfo, void* inputBuffer, Nd4jLong* inputShapeInfo, Nd4jLong* pTadShape, Nd4jLong* pTadOffsets, Nd4jLong n) { + static __global__ void fillUpElementKernel(void* outputBuffer, Nd4jLong const* outputShapeInfo, void* inputBuffer, Nd4jLong const* inputShapeInfo, Nd4jLong const* pTadShape, Nd4jLong const* pTadOffsets, Nd4jLong n) { __shared__ Nd4jLong bufferLength; auto z = reinterpret_cast(outputBuffer); @@ -66,7 +66,7 @@ namespace helpers { else { // rank greater than 1 std::vector lastDims({input->rankOf() - 1});// = ShapeUtils::evalDimsToExclude(input->rankOf(), {input->rankOf() - 1}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.getShapeInfo(), lastDims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), lastDims); auto pTadShape = packX.specialShapeInfo(); auto pTadShapeH = packX.primaryShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/one_hot.cu b/libnd4j/include/ops/declarable/helpers/cuda/one_hot.cu index f1b87c1aa..f15200459 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/one_hot.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/one_hot.cu @@ -93,12 +93,12 @@ void onehot(const sd::LaunchContext* context, const NDArray *indices, NDArray *o const int threadsPerBlock = MAX_NUM_THREADS / 4; const int blocksPerGrid = (output->lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - const int sharedMem = threadsPerBlock * sizeof(decltype(*output->getShapeInfo())) * output->rankOf() + 128; + const int sharedMem = threadsPerBlock * sizeof(decltype(*output->shapeInfo())) * output->rankOf() + 128; PointersManager manager(context, "onehot"); NDArray::prepareSpecialUse({output}, {indices}); - BUILD_DOUBLE_SELECTOR(xType, zType, onehotCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices->getSpecialBuffer(), indices->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), axis, depth, on, off), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, zType, onehotCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices->specialBuffer(), indices->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), axis, depth, on, off), LIBND4J_TYPES, LIBND4J_TYPES); NDArray::registerSpecialUse({output}, {indices}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/pad.cu b/libnd4j/include/ops/declarable/helpers/cuda/pad.cu index fc4d96ce0..842a41ced 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/pad.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/pad.cu @@ -139,7 +139,7 @@ namespace sd { const auto xType = input.dataType(); const auto yType = paddings.dataType(); - BUILD_DOUBLE_SELECTOR(xType, yType, padCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), mode, input.getSpecialBuffer(), input.getSpecialShapeInfo(), paddings.getSpecialBuffer(), paddings.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), padValue.getSpecialBuffer()), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, padCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), mode, input.specialBuffer(), input.specialShapeInfo(), paddings.specialBuffer(), paddings.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), padValue.specialBuffer()), LIBND4J_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({&output}, {&input, &paddings, &padValue}); manager.synchronize(); @@ -148,7 +148,7 @@ namespace sd { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - static __global__ void mirrorPadLinearKernel(void const* vx, Nd4jLong* xShape, void* vz, Nd4jLong* zShape, Nd4jLong leftSide, Nd4jLong leftSideCorrected, Nd4jLong xLen, Nd4jLong len, Nd4jLong zLen) { + static __global__ void mirrorPadLinearKernel(void const* vx, const Nd4jLong* xShape, void* vz, const Nd4jLong* zShape, Nd4jLong leftSide, Nd4jLong leftSideCorrected, Nd4jLong xLen, Nd4jLong len, Nd4jLong zLen) { __shared__ T const* x; __shared__ T* z; @@ -178,7 +178,7 @@ namespace sd { } template - static __global__ void mirrorPadKernel(void const* vx, Nd4jLong* xShape, void* vz, Nd4jLong* zShape, Nd4jLong outLen, void const* paddings, Nd4jLong* paddingShape, int reflBorder) { + static __global__ void mirrorPadKernel(void const* vx, const Nd4jLong* xShape, void* vz, const Nd4jLong* zShape, Nd4jLong outLen, void const* paddings, const Nd4jLong* paddingShape, int reflBorder) { __shared__ F const* x; __shared__ I const* pads; @@ -247,11 +247,11 @@ namespace sd { const auto leftSideCorrected = leftSide - reflBorder; const Nd4jLong len = 2*(inLen-1) + leftSide + reflBorder; - mirrorPadLinearKernel<<<256, 512, 256, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftSide, leftSideCorrected, inLen, len, outLen); + mirrorPadLinearKernel<<<256, 512, 256, *stream>>>(input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftSide, leftSideCorrected, inLen, len, outLen); sd::DebugHelper::checkErrorCode(stream, "helpers::mirrorPadLinearKernel(...) failed"); } else { - mirrorPadKernel<<<256, 256, 8192, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), outLen, paddings.getSpecialBuffer(), paddings.getSpecialShapeInfo(), reflBorder); + mirrorPadKernel<<<256, 256, 8192, *stream>>>(input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), outLen, paddings.specialBuffer(), paddings.specialShapeInfo(), reflBorder); sd::DebugHelper::checkErrorCode(stream, "helpers::mirrorPadKernel(...) failed"); } NDArray::registerSpecialUse({&output}, {&input, &paddings}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu b/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu index ebb067251..7f2bcdcfd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu @@ -30,7 +30,10 @@ namespace ops { namespace helpers { template - static _CUDA_G void percentileKernel(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, const Nd4jLong numTads, const Nd4jLong tadLength, void *vz, Nd4jLong *zShapeInfo, const Nd4jLong zLength, const Nd4jLong position) { + static _CUDA_G void percentileKernel(void *vx, const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong numTads, const Nd4jLong tadLength, + void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong zLength, + const Nd4jLong position) { for (int t = blockIdx.x; t < numTads; t += gridDim.x) { auto x = reinterpret_cast(vx) + xTadOffsets[t]; auto z = reinterpret_cast(vz); @@ -93,8 +96,8 @@ namespace helpers { else shape::checkDimensions(inputRank, axis); - auto tempArray = input.dup(input.ordering()); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(tempArray.getShapeInfo(), axis); + auto tempArray = input.dup(); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(tempArray.shapeInfo(), axis); auto tadLength = shape::length(packX.primaryShapeInfo()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/polyGamma.cu b/libnd4j/include/ops/declarable/helpers/cuda/polyGamma.cu index 2f96d96e7..3e82632e2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/polyGamma.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/polyGamma.cu @@ -90,7 +90,7 @@ void polyGamma(sd::LaunchContext * context, const NDArray& n, const NDArray& x, int threadsPerBlock = MAX_NUM_THREADS / 2; int blocksPerGrid = (z.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - BUILD_SINGLE_SELECTOR(n.dataType(), polyGammaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), n.getSpecialBuffer(), n.getSpecialShapeInfo(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(n.dataType(), polyGammaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), n.specialBuffer(), n.specialShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), z.specialBuffer(), z.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&z}, {&n, &x}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu b/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu index 3d1fd104a..d2832ec80 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu @@ -148,8 +148,8 @@ static void prefixPerBlockCudaLauncher(const int blocksPerGrid, const int thread /////////////////////////////////////////////////////////////////// void prefix(sd::LaunchContext * context, scalar::Ops op, const NDArray* x, NDArray* z, const std::vector& dims, bool exclusive, bool reverse) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); const Nd4jLong numTads = packX.numberOfTads(); const Nd4jLong tadLen = x->lengthOf() / numTads; @@ -161,7 +161,7 @@ void prefix(sd::LaunchContext * context, scalar::Ops op, const NDArray* x, NDArr PointersManager manager(context, "prefix"); NDArray::prepareSpecialUse({z}, {x}); - BUILD_SINGLE_SELECTOR(x->dataType(), prefixPerBlockCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, x->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), z->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), numTads, tadLen, exclusive, reverse), NUMERIC_TYPES); + BUILD_SINGLE_SELECTOR(x->dataType(), prefixPerBlockCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, x->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), z->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), numTads, tadLen, exclusive, reverse), NUMERIC_TYPES); NDArray::registerSpecialUse({z}, {x}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/print_variable.cu b/libnd4j/include/ops/declarable/helpers/cuda/print_variable.cu index a518ddd72..6733ce642 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/print_variable.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/print_variable.cu @@ -51,7 +51,7 @@ namespace sd { NDArray::prepareSpecialUse({}, {&array}); PointersManager pm(&ctx, "print_device"); - BUILD_SINGLE_SELECTOR(array.dataType(), exec_print_device, (ctx, array.getSpecialBuffer(), array.getSpecialShapeInfo()), LIBND4J_TYPES) + BUILD_SINGLE_SELECTOR(array.dataType(), exec_print_device, (ctx, array.specialBuffer(), array.specialShapeInfo()), LIBND4J_TYPES) pm.synchronize(); NDArray::registerSpecialUse({}, {&array}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/qr.cu b/libnd4j/include/ops/declarable/helpers/cuda/qr.cu index 394840376..828867b4e 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/qr.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/qr.cu @@ -70,7 +70,7 @@ namespace helpers { /* m = I - v v^T */ template - static __global__ void vmulKernel(T* resBuf, Nd4jLong* resShape, T const* vBuff, Nd4jLong const* vShape, Nd4jLong n) { + static __global__ void vmulKernel(T* resBuf, const Nd4jLong* resShape, T const* vBuff, Nd4jLong const* vShape, Nd4jLong n) { for (auto i = blockIdx.x; i < n; i += gridDim.x) for (auto j = threadIdx.x; j < n; j += blockDim.x) { Nd4jLong posR[] = {i, j}; @@ -89,7 +89,7 @@ namespace helpers { auto stream = context->getCudaStream(); vmulKernel<<<128, 128, 128, *stream>>>(res.dataBuffer()->specialAsT(), res.specialShapeInfo(), - reinterpret_cast(v.getSpecialBuffer()), v.getSpecialShapeInfo(), n); + reinterpret_cast(v.specialBuffer()), v.specialShapeInfo(), n); return res; } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/random.cu b/libnd4j/include/ops/declarable/helpers/cuda/random.cu index 59f22d878..fe692a0df 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/random.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/random.cu @@ -44,8 +44,8 @@ namespace helpers { * output - distributed output. * */ template - static __global__ void fillGammaKernel(T* uList, Nd4jLong uLength, T* alpha, Nd4jLong* alphaShape, - T* beta, Nd4jLong* betaShape, T* output, Nd4jLong* outputShape) { + static __global__ void fillGammaKernel(T* uList, Nd4jLong uLength, T* alpha, const Nd4jLong* alphaShape, + T* beta, const Nd4jLong* betaShape, T* output, const Nd4jLong* outputShape) { // fill up __shared__ Nd4jLong aLength; if (threadIdx.x == 0) { @@ -70,7 +70,7 @@ namespace helpers { template static void fillRandomGamma_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output) { // To fill up output need to broadcast alpha and beta to the same shape and in - Nd4jLong* broadcasted = nullptr; + const Nd4jLong* broadcasted = nullptr; if (beta != nullptr) ShapeUtils::evalBroadcastShapeInfo(*alpha, *beta, true, broadcasted, context->getWorkspace()); else @@ -136,8 +136,8 @@ namespace helpers { return x. * */ template - static __global__ void fillPoissonKernel(T* uList, Nd4jLong uLength, T* lambda, Nd4jLong* lambdaShape, T* output, - Nd4jLong* outputShape) { + static __global__ void fillPoissonKernel(T* uList, Nd4jLong uLength, T* lambda, const Nd4jLong* lambdaShape, + T* output, const Nd4jLong* outputShape) { __shared__ Nd4jLong step; @@ -186,7 +186,7 @@ namespace helpers { BUILD_SINGLE_TEMPLATE(template void fillRandomPoisson_, (LaunchContext* context, graph::RandomGenerator& rng, NDArray* lambda, NDArray* output), FLOAT_NATIVE); template - static __global__ void fillUniformKernel(graph::RandomGenerator* devRng, T from, T to, T* output, Nd4jLong* outputShape) { + static __global__ void fillUniformKernel(graph::RandomGenerator* devRng, T from, T to, T* output, const Nd4jLong* outputShape) { auto start = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; @@ -247,9 +247,6 @@ namespace helpers { BUILD_SINGLE_SELECTOR(output->dataType(), fillRandomUniform_, (context, rng, min, max, output), NUMERIC_TYPES); } - BUILD_SINGLE_TEMPLATE(template void fillRandomUniform_, (LaunchContext* context, - graph::RandomGenerator& rng, NDArray* min, NDArray* max, NDArray* output), NUMERIC_TYPES); - /////////////////////////////////////////////////////////////////// // used https://en.wikipedia.org/wiki/Categorical_distribution // methods: gumbel trick + softmax + argmax @@ -346,8 +343,8 @@ void fillRandomMultiNomial(LaunchContext* context, graph::RandomGenerator& rng, NDArray::prepareSpecialUse({ &output }, { &input }); BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), fillMultiNomialCudaLauncher, - (blocksPerGrid, threadsPerBlock, context->getCudaStream(), devRng, input.getSpecialBuffer(), - input.getSpecialShapeInfo(), output.specialBuffer(), + (blocksPerGrid, threadsPerBlock, context->getCudaStream(), devRng, input.specialBuffer(), + input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), batchValue, numOfSamples, numOfClassX, dimA), FLOAT_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({ &output }, { &input }); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/range.cu b/libnd4j/include/ops/declarable/helpers/cuda/range.cu index 668518d82..e33f95c52 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/range.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/range.cu @@ -39,7 +39,7 @@ namespace helpers { // be careful: outVector must have c-order and ews = 1 !!! template static void _range(sd::LaunchContext * context, const NDArray& start, const NDArray& delta, NDArray& outVector) { - global_range<<<512, 512, 2048, *context->getCudaStream()>>>(outVector.getSpecialBuffer(), outVector.lengthOf(), start.e(0), delta.e(0)); + global_range<<<512, 512, 2048, *context->getCudaStream()>>>(outVector.specialBuffer(), outVector.lengthOf(), start.e(0), delta.e(0)); } void range(sd::LaunchContext * context, const NDArray& start, const NDArray& delta, NDArray& outVector) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu index 793d90f91..b6bbeea4c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu @@ -31,8 +31,8 @@ namespace ops { namespace helpers { template - static __global__ void reverseTadKernel(void* vinput, Nd4jLong *inputShape, void* voutput, Nd4jLong *outputShape, Nd4jLong *inputTadShape, Nd4jLong *inputTadOffsets, Nd4jLong *outputTadShape, Nd4jLong *outputTadOffsets, uint64_t limit, uint64_t numOfElemsToReverse, uint64_t numTads) { - auto input = reinterpret_cast(vinput); + static __global__ void reverseTadKernel(const void* vinput, const Nd4jLong *inputShape, void* voutput, const Nd4jLong *outputShape, const Nd4jLong *inputTadShape, const Nd4jLong *inputTadOffsets, const Nd4jLong *outputTadShape, const Nd4jLong *outputTadOffsets, uint64_t limit, uint64_t numOfElemsToReverse, uint64_t numTads) { + auto input = reinterpret_cast(vinput); auto output = reinterpret_cast(voutput); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; @@ -92,11 +92,11 @@ namespace helpers { template - static __global__ void reverseArrayKernel(void* input, Nd4jLong *inputShape, void* output, Nd4jLong *outputShape, Nd4jLong numOfElemsToReverse) { + static __global__ void reverseArrayKernel(const void* input, const Nd4jLong *inputShape, void* output, const Nd4jLong *outputShape, Nd4jLong numOfElemsToReverse) { const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; __shared__ int linearStatus; - __shared__ T* inputArr; + __shared__ const T* inputArr; __shared__ T* outputArr; __shared__ char inputOrder, outputOrder; @@ -105,7 +105,7 @@ namespace helpers { char inputOrder = shape::order(inputShape); char outputOrder = shape::order(outputShape); - inputArr = reinterpret_cast(input); + inputArr = reinterpret_cast(input); outputArr = reinterpret_cast(output); } __syncthreads(); @@ -141,9 +141,9 @@ namespace helpers { } template - static void reverseTad(sd::LaunchContext * context, const NDArray* input, NDArray* output, Nd4jLong *inputTadShape, Nd4jLong *inputTadOffsets, Nd4jLong *outputTadShape, Nd4jLong *outputTadOffsets, uint64_t tadLength) { + static void reverseTad(sd::LaunchContext * context, const NDArray* input, NDArray* output, const Nd4jLong *inputTadShape, const Nd4jLong *inputTadOffsets, const Nd4jLong *outputTadShape, const Nd4jLong *outputTadOffsets, uint64_t tadLength) { auto stream = context->getCudaStream(); - reverseTadKernel<<<256, 512, 8192, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), inputTadShape, inputTadOffsets, outputTadShape, outputTadOffsets, input->lengthOf(), tadLength, input->lengthOf() / tadLength); + reverseTadKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), inputTadShape, inputTadOffsets, outputTadShape, outputTadOffsets, input->lengthOf(), tadLength, input->lengthOf() / tadLength); } template @@ -153,7 +153,7 @@ namespace helpers { if (numOfElemsToReverse == 0) numOfReverse = input->lengthOf(); - reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfReverse); + reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfReverse); } @@ -164,12 +164,12 @@ namespace helpers { seqLengths->syncToHost(); auto stream = context->getCudaStream(); - if(input->isVector() || shape::isLikeVector(input->getShapeInfo(), posOfNonUnityDim) || seqLengths->lengthOf() == 1) { + if(input->isVector() || shape::isLikeVector(input->shapeInfo(), posOfNonUnityDim) || seqLengths->lengthOf() == 1) { int numOfElemsToReverse = seqLengths->e(0); if((seqDim == 0 && input->sizeAt(0) == 1) || (batchDim == posOfNonUnityDim)) output->assign(input); else - reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfElemsToReverse);//helpers::reverseArray(context, const_cast(input), output, numOfElemsToReverse); + reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfElemsToReverse);//helpers::reverseArray(context, const_cast(input), output, numOfElemsToReverse); } else { @@ -202,7 +202,7 @@ namespace helpers { NDArray::prepareSpecialUse({output}, {input, seqLengths}); // if op isn't inplace - copy original data into output array - if (output->getSpecialBuffer() != input->getSpecialBuffer()) + if (output->specialBuffer() != input->specialBuffer()) output->assign(input); BUILD_SINGLE_SELECTOR(input->dataType(), reverseSequence_, (context, input, seqLengths, output, seqDim, batchDim), LIBND4J_TYPES); @@ -214,8 +214,8 @@ namespace helpers { // we need to reverse axis only if that's new op std::vector dimensions = isBackProp ? ShapeUtils::evalDimsToExclude(input->rankOf(), *intArgs) : *intArgs; std::vector axis = ShapeUtils::evalDimsToExclude(input->rankOf(), dimensions); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); @@ -229,9 +229,6 @@ namespace helpers { NDArray::registerSpecialUse({output}, {input}); } - -BUILD_SINGLE_TEMPLATE(template void reverseArray, (sd::LaunchContext * context, const NDArray *inArr, NDArray *outArr, Nd4jLong numOfElemsToReverse), LIBND4J_TYPES); - } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/roll.cu b/libnd4j/include/ops/declarable/helpers/cuda/roll.cu index d014b9115..773f7279d 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/roll.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/roll.cu @@ -27,8 +27,8 @@ namespace ops { namespace helpers { template - static void _CUDA_D rollKernelLinearStage1Dev(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift) { - auto x = reinterpret_cast(vx); + static void _CUDA_D rollKernelLinearStage1Dev(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -69,13 +69,13 @@ namespace helpers { } template - static void _CUDA_G rollKernelLinearStage1(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift) { + static void _CUDA_G rollKernelLinearStage1(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift) { rollKernelLinearStage1Dev(vx, xShapeInfo, vz, zShapeInfo, fullLength, actualShift); } template - static void _CUDA_G rollKernelLinearStage2(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift, int shiftCount) { - auto x = reinterpret_cast(vx); + static void _CUDA_G rollKernelLinearStage2(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift, int shiftCount) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -126,8 +126,8 @@ namespace helpers { } template - static void _CUDA_G rollKernelLinearStage3(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift, int remainShift) { - auto x = reinterpret_cast(vx); + static void _CUDA_G rollKernelLinearStage3(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift, int remainShift) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -170,7 +170,7 @@ namespace helpers { } template - static void _CUDA_D swapTadsKernel(void *vx, void *vz, Nd4jLong *zShapeInfo, Nd4jLong tadLength) { + static void _CUDA_D swapTadsKernel(void *vx, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong tadLength) { auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); @@ -202,8 +202,8 @@ namespace helpers { } template - static void _CUDA_G rollKernelFullAnyDimensionStage1(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, int numTads, Nd4jLong tadLength, int dim, Nd4jLong sizeAt, int theShift) { - auto x = reinterpret_cast(vx); + static void _CUDA_G rollKernelFullAnyDimensionStage1(const void *vx, const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, void *vz, const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, int numTads, Nd4jLong tadLength, int dim, Nd4jLong sizeAt, int theShift) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); for (int e = blockIdx.x + theShift; e < sizeAt - theShift; e += gridDim.x) { @@ -215,8 +215,8 @@ namespace helpers { } template - static void _CUDA_G rollKernelFullAnyDimensionStage2(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, int numTads, Nd4jLong tadLength, int dim, Nd4jLong sizeAt, int theShift) { - auto x = reinterpret_cast(vx); + static void _CUDA_G rollKernelFullAnyDimensionStage2(void *vx, const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, void *vz, const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, int numTads, Nd4jLong tadLength, int dim, Nd4jLong sizeAt, int theShift) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); for (int e = blockIdx.x; e < theShift; e += gridDim.x) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/s_t_b.cu b/libnd4j/include/ops/declarable/helpers/cuda/s_t_b.cu index 5784699d0..8b7bfb2b5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/s_t_b.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/s_t_b.cu @@ -108,7 +108,7 @@ void batchToSpace(sd::LaunchContext* context, const NDArray& input, NDArray& out PointersManager manager(context, "batchToSpace"); NDArray::prepareSpecialUse({&output}, {&inputRearranged1}); - BUILD_SINGLE_SELECTOR(input.dataType(), batchToSpaceCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.getSpecialBuffer(), inputRearranged1.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), cropBottom, cropLeft), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), batchToSpaceCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.specialBuffer(), inputRearranged1.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), cropBottom, cropLeft), LIBND4J_TYPES); NDArray::registerSpecialUse({&output}, {&inputRearranged1}); manager.synchronize(); @@ -239,7 +239,7 @@ void batchToSpaceND(sd::LaunchContext* context, const NDArray& input, const NDAr PointersManager manager(context, "batchToSpaceND"); NDArray::prepareSpecialUse({&output}, {&inputRearranged1, &crop}); - BUILD_DOUBLE_SELECTOR(input.dataType(), crop.dataType(), batchToSpaceNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.getSpecialBuffer(), inputRearranged1.getSpecialShapeInfo(), crop.getSpecialBuffer(), crop.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES); + BUILD_DOUBLE_SELECTOR(input.dataType(), crop.dataType(), batchToSpaceNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.specialBuffer(), inputRearranged1.specialShapeInfo(), crop.specialBuffer(), crop.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES); NDArray::registerSpecialUse({&output}, {&inputRearranged1, &crop}); manager.synchronize(); @@ -331,12 +331,12 @@ void spaceToBatch(sd::LaunchContext* context, const NDArray& input, NDArray& out PointersManager manager(context, "spaceToBatch"); NDArray::prepareSpecialUse({&outputRearranged1}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatchCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), padBottom, padTop, padLeft, padRight), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatchCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), padBottom, padTop, padLeft, padRight), LIBND4J_TYPES); NDArray::registerSpecialUse({&outputRearranged1}, {&input}); manager.synchronize(); - if(output.getSpecialBuffer() != outputRearranged1.getSpecialBuffer()) + if(output.specialBuffer() != outputRearranged1.specialBuffer()) outputRearranged0.assign(outputRearranged1); } } @@ -478,12 +478,12 @@ void spaceToBatchND(sd::LaunchContext* context, const NDArray& input, const NDAr PointersManager manager(context, "spaceToBatchND"); NDArray::prepareSpecialUse({&outputRearranged1}, {&input, &padding}); - BUILD_DOUBLE_SELECTOR(input.dataType(), padding.dataType(), spaceToBatchNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), padding.getSpecialBuffer(), padding.getSpecialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES); + BUILD_DOUBLE_SELECTOR(input.dataType(), padding.dataType(), spaceToBatchNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), padding.specialBuffer(), padding.specialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES); NDArray::registerSpecialUse({&outputRearranged1}, {&input, &padding}); manager.synchronize(); - if(output.getSpecialBuffer() != outputRearranged1.getSpecialBuffer()) + if(output.specialBuffer() != outputRearranged1.specialBuffer()) outputRearranged0.assign(outputRearranged1); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/s_t_d.cu b/libnd4j/include/ops/declarable/helpers/cuda/s_t_d.cu index 4290a57c6..19a1937dd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/s_t_d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/s_t_d.cu @@ -24,8 +24,12 @@ namespace sd { namespace ops { namespace helpers { template - static _CUDA_G void spaceToDepthKernel(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, const int block_size, const bool isNHWC) { - auto input_ptr = reinterpret_cast(vx); + static _CUDA_G void spaceToDepthKernel( + const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const int block_size, + const bool isNHWC) { + auto input_ptr = reinterpret_cast(vx); auto output_ptr = reinterpret_cast(vz); const int batch_size = shape::sizeAt(xShapeInfo, 0); @@ -91,7 +95,7 @@ namespace helpers { template static void _spaceTodepth_(sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC) { - spaceToDepthKernel<<<512, 512, 1024, *context->getCudaStream()>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), block_size, isNHWC); + spaceToDepthKernel<<<512, 512, 1024, *context->getCudaStream()>>>(input.specialBuffer(), input.specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), block_size, isNHWC); } void _spaceTodepth(sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC) { @@ -99,9 +103,6 @@ namespace helpers { BUILD_SINGLE_SELECTOR(input.dataType(), _spaceTodepth_, (context, input, output, block_size, isNHWC), LIBND4J_TYPES); NDArray::registerSpecialUse({output}, {&input}); } - - BUILD_SINGLE_TEMPLATE(template void _spaceTodepth_, (sd::LaunchContext *context, const NDArray &input, NDArray *output, int block_size, bool isNHWC), LIBND4J_TYPES); - } } } \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu index 364ad83d2..94b0e0080 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu @@ -96,7 +96,7 @@ Nd4jLong checkIndices(sd::LaunchContext *context, const NDArray& indices, const NDArray numOfBadIndx(sd::DataType::INT64, context, true); NDArray::prepareSpecialUse({&numOfBadIndx}, {&indices}); - BUILD_SINGLE_SELECTOR(xType, checkIndicesCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), reinterpret_cast(numOfBadIndx.getSpecialBuffer()), output.getSpecialShapeInfo(), axis), INDEXING_TYPES); + BUILD_SINGLE_SELECTOR(xType, checkIndicesCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.specialBuffer(), indices.specialShapeInfo(), reinterpret_cast(numOfBadIndx.specialBuffer()), output.specialShapeInfo(), axis), INDEXING_TYPES); NDArray::registerSpecialUse({&numOfBadIndx}, {&indices}); manager.synchronize(); @@ -346,7 +346,7 @@ void scatter(sd::LaunchContext *context, pairwise::Ops op, const NDArray& indic PointersManager manager(context, "scatter"); NDArray::prepareSpecialUse({&output}, {&updates, &indices}); - BUILD_DOUBLE_SELECTOR(xType, yType, scatterCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), lock), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, scatterCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), lock), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); NDArray::registerSpecialUse({&output}, {&updates, &indices}); manager.synchronize(); @@ -612,7 +612,7 @@ void scatterND(sd::LaunchContext *context, pairwise::Ops op, const NDArray& ind PointersManager manager(context, "scatterND"); NDArray::prepareSpecialUse({&output}, {&updates, &indices}); - BUILD_DOUBLE_SELECTOR(xType, yType, scatterNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), lock), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, scatterNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), lock), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); NDArray::registerSpecialUse({&output}, {&updates, &indices}); manager.synchronize(); @@ -682,12 +682,12 @@ void scatterForLoss(sd::LaunchContext* context, const NDArray& indices, NDArray& if(calcGrad) { NDArray::prepareSpecialUse({&updates}, {&indices}); - BUILD_DOUBLE_SELECTOR(indices.dataType(), updates.dataType(), scatterForLossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), nullptr, nullptr), INDEXING_TYPES, FLOAT_TYPES); + BUILD_DOUBLE_SELECTOR(indices.dataType(), updates.dataType(), scatterForLossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), nullptr, nullptr), INDEXING_TYPES, FLOAT_TYPES); NDArray::registerSpecialUse({&updates}, {&indices}); } else { NDArray::prepareSpecialUse({&output}, {&indices, &updates}); - BUILD_DOUBLE_SELECTOR(indices.dataType(), updates.dataType(), scatterForLossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), INDEXING_TYPES, FLOAT_TYPES); + BUILD_DOUBLE_SELECTOR(indices.dataType(), updates.dataType(), scatterForLossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), INDEXING_TYPES, FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&indices, &updates}); } @@ -736,8 +736,8 @@ __global__ static void scatterLockCuda(const int opCode, std::vector yTadDims(sizeOfUpdDims); std::iota(yTadDims.begin(), yTadDims.end(), 0); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), ShapeUtils::evalDimsToExclude(updates.rankOf(), yTadDims)); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), zTadDims); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), ShapeUtils::evalDimsToExclude(updates.rankOf(), yTadDims)); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), zTadDims); const Nd4jLong zTadLen = shape::length(packZ.primaryShapeInfo()); const Nd4jLong yTadLen = shape::length(packY.primaryShapeInfo()); @@ -748,7 +748,7 @@ __global__ static void scatterLockCuda(const int opCode, const auto xType = indices.dataType(); const auto yType = updates.dataType(); - BUILD_DOUBLE_SELECTOR(xType, yType, scatterLockCudaLauncher, (blocksPerGrid, threadsPerBlock, 1024, context->getCudaStream(), op, indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.getSpecialBuffer(), packY.specialShapeInfo(), packY.specialOffsets(), output.getSpecialBuffer(), packZ.specialShapeInfo(), packZ.specialOffsets(), indices.lengthOf(), yTadLen, zTadLen), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, scatterLockCudaLauncher, (blocksPerGrid, threadsPerBlock, 1024, context->getCudaStream(), op, indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), packY.specialShapeInfo(), packY.specialOffsets(), output.specialBuffer(), packZ.specialShapeInfo(), packZ.specialOffsets(), indices.lengthOf(), yTadLen, zTadLen), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); @@ -963,8 +963,8 @@ __global__ static void scatterLockCuda(const int opCode, std::vector dims = {0}; auto inverted = ShapeUtils::evalDimsToExclude(output.rankOf(), dims); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), inverted); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), inverted); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), inverted); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), inverted); auto psX = packX.specialShapeInfo(); auto psY = packY.specialShapeInfo(); @@ -984,9 +984,9 @@ __global__ static void scatterLockCuda(const int opCode, auto blockSize = sd::math::nd4j_max(32, sd::math::nd4j_min(tadLengthX, 1024)); if (lock) - scatterCuda<<<512, blockSize, 1024, *context->getCudaStream()>>>(op, indices.lengthOf(), output.getSpecialBuffer(), psX, poX, updates.getSpecialBuffer(), psY, poY, reinterpret_cast(indices.getSpecialBuffer()), tadLengthX, tadLengthY); + scatterCuda<<<512, blockSize, 1024, *context->getCudaStream()>>>(op, indices.lengthOf(), output.specialBuffer(), psX, poX, updates.specialBuffer(), psY, poY, reinterpret_cast(indices.specialBuffer()), tadLengthX, tadLengthY); else - scatterCuda<<<512, blockSize, 1024, *context->getCudaStream()>>>(op, indices.lengthOf(), output.getSpecialBuffer(), psX, poX, updates.getSpecialBuffer(), psY, poY, reinterpret_cast(indices.getSpecialBuffer()), tadLengthX, tadLengthY); + scatterCuda<<<512, blockSize, 1024, *context->getCudaStream()>>>(op, indices.lengthOf(), output.specialBuffer(), psX, poX, updates.specialBuffer(), psY, poY, reinterpret_cast(indices.specialBuffer()), tadLengthX, tadLengthY); NDArray::registerSpecialUse({&output}, {&updates, &indices}); manager.synchronize(); @@ -1016,9 +1016,9 @@ const int xLastDim = indices.sizeAt(-1); zTadDims[i] = zRank - 1 - j; } - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(indices.getShapeInfo(), {xRank - 1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), yTadDims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), zTadDims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(indices.shapeInfo(), {xRank - 1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), yTadDims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), zTadDims); const int threadsPerBlock = MAX_NUM_THREADS / 4; const int blocksPerGrid = packZ.numberOfTads(); @@ -1151,11 +1151,11 @@ const int xLastDim = indices.sizeAt(-1); // PointersManager::printDevContentOnDev(yShapeInfo, 8); // PointersManager::printDevContentOnDev(zShapeInfo, 8); - // manager.printDevContentOnHost(indices.getSpecialBuffer(), indices.lengthOf()); - // manager.printDevContentOnHost(indices.getSpecialShapeInfo(), shape::shapeInfoLength(indices.rankOf())); - // manager.printDevContentOnHost(updates.getSpecialBuffer(), updates.lengthOf()); - // manager.printDevContentOnHost(updates.getSpecialShapeInfo(), shape::shapeInfoLength(updates.rankOf())); - // manager.printDevContentOnHost(output.getSpecialShapeInfo(), shape::shapeInfoLength(output.rankOf())); + // manager.printDevContentOnHost(indices.specialBuffer(), indices.lengthOf()); + // manager.printDevContentOnHost(indices.specialShapeInfo(), shape::shapeInfoLength(indices.rankOf())); + // manager.printDevContentOnHost(updates.specialBuffer(), updates.lengthOf()); + // manager.printDevContentOnHost(updates.specialShapeInfo(), shape::shapeInfoLength(updates.rankOf())); + // manager.printDevContentOnHost(output.specialShapeInfo(), shape::shapeInfoLength(output.rankOf())); // printf("!!!!!!!\n"); // manager.printDevContentOnHost(packX.specialShapeInfo(), 2*shape::rank(packX.primaryShapeInfo()) + 4); // manager.printDevContentOnHost(packX.specialOffsets(), packX.numberOfTads()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu index 277a1f587..a17464cbd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu @@ -33,9 +33,9 @@ namespace sd { namespace ops { namespace helpers { template - static _CUDA_G void scatterSimpleKernel(void *vx, Nd4jLong *xTadShape, Nd4jLong *xTadOffsets, Nd4jLong xLength, Nd4jLong numTads, void *vi, Nd4jLong *iShapeInfo, Nd4jLong iLength, void *vu, Nd4jLong *uShapeInfo, Nd4jLong uLength) { - auto u = reinterpret_cast(vu); - auto indices = reinterpret_cast(vi); + static _CUDA_G void scatterSimpleKernel(void *vx, const Nd4jLong *xTadShape, const Nd4jLong *xTadOffsets, Nd4jLong xLength, Nd4jLong numTads, const void *vi, const Nd4jLong *iShapeInfo, Nd4jLong iLength, const void *vu, const Nd4jLong *uShapeInfo, Nd4jLong uLength) { + auto u = reinterpret_cast(vu); + auto indices = reinterpret_cast(vi); auto tid = threadIdx.x + blockIdx.x * blockDim.x; for (int i = tid; i < iLength; i += blockDim.x * gridDim.x) { @@ -51,13 +51,13 @@ namespace sd { void scatterSimple_(sd::LaunchContext * context, const int opId, NDArray& input, const NDArray& updates, const NDArray& indices, const std::vector& dimensions) { auto dims = ShapeUtils::evalDimsToExclude(input.rankOf(), dimensions); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dims); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dims); auto xLength = shape::length(packX.primaryShapeInfo()); auto iLength = indices.lengthOf(); auto uLength = updates.lengthOf(); - scatterSimpleKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input.getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), xLength, packX.numberOfTads(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), iLength, updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), uLength); + scatterSimpleKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), xLength, packX.numberOfTads(), indices.specialBuffer(), indices.specialShapeInfo(), iLength, updates.specialBuffer(), updates.specialShapeInfo(), uLength); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu index 748a2e6a3..51f917a79 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu @@ -114,15 +114,15 @@ namespace sd { for (int e = 2; e < 2 + numOfDims; e++) tadDimensions[e-2] = (*intArgs)[e]; - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), tadDimensions); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), tadDimensions); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), tadDimensions); + auto packY = ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), tadDimensions); NDArray indices(const_cast(intArgs->data()) + numOfDims + 3, 'c', {numOfInd}, sd::DataType::INT32, context); PointersManager manager(context, "scatterUpdate"); NDArray::prepareSpecialUse({&input}, {&input, &updates, &indices}); - BUILD_SINGLE_SELECTOR(input.dataType(), scatterUpdateCudaLauncher, (context->getCudaStream(), opCode, numOfInd, input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), updates.specialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), reinterpret_cast(indices.getSpecialBuffer())), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), scatterUpdateCudaLauncher, (context->getCudaStream(), opCode, numOfInd, input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), updates.specialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), reinterpret_cast(indices.specialBuffer())), LIBND4J_TYPES); NDArray::registerSpecialUse({&input}, {&input, &updates, &indices}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment.cu index 796dd6a1e..60d00fb60 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment.cu @@ -47,7 +47,7 @@ namespace helpers { // Unsorted segment ops functors implementation // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void unsortedSegmentIndexValidateKernel(I* indices, Nd4jLong* indicesShape, I expected, I* found) { + static __global__ void unsortedSegmentIndexValidateKernel(const I* indices, const Nd4jLong* indicesShape, I expected, I* found) { __shared__ bool onlyTrue; __shared__ Nd4jLong len; @@ -90,12 +90,12 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // fill up segments starts and ends - splitted ordered case template - static __global__ void fillUpSegmentsKernel(void* indices, Nd4jLong* indexShape, int numClasses, int* classesRangesStart, int* classesRangesLenghts) { - __shared__ I* idxBuf; + static __global__ void fillUpSegmentsKernel(const void* indices, const Nd4jLong* indexShape, int numClasses, int* classesRangesStart, int* classesRangesLenghts) { + __shared__ const I* idxBuf; __shared__ Nd4jLong idxLen; __shared__ int* result; if (threadIdx.x == 0) { - idxBuf = reinterpret_cast(indices); + idxBuf = reinterpret_cast(indices); idxLen = shape::length(indexShape); } __syncthreads(); @@ -115,8 +115,8 @@ namespace helpers { template static void fillUpSegments_(NDArray* indices, Nd4jLong numClasses, NDArray& classesRangesBegs, NDArray& classesRangesLens) { dim3 dims(numClasses, indices->lengthOf(), numClasses * 32 + 32); - int* begins = reinterpret_cast(classesRangesBegs.getSpecialBuffer()); - int* lengths = reinterpret_cast(classesRangesLens.getSpecialBuffer()); + int* begins = reinterpret_cast(classesRangesBegs.specialBuffer()); + int* lengths = reinterpret_cast(classesRangesLens.specialBuffer()); auto stream = classesRangesBegs.getContext()->getCudaStream(); fillUpSegmentsKernel<<>>(indices->specialBuffer(), indices->specialShapeInfo(), numClasses, begins, lengths); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu index e7baf2370..927b1bb2f 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu @@ -38,8 +38,8 @@ namespace sd { template static __global__ void - segmentMaxLinearKernel(void *input, Nd4jLong *inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, - void *output, Nd4jLong *outputShape) { + segmentMaxLinearKernel(void *input, Nd4jLong const* inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, + void *output, Nd4jLong const* outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, zIndex; __shared__ T *x; @@ -77,9 +77,9 @@ namespace sd { template static __global__ void - unsortedSegmentMaxLinearKernel(void *input, Nd4jLong *inputShape, void *indices, Nd4jLong *indicesShape, + unsortedSegmentMaxLinearKernel(void *input, Nd4jLong const* inputShape, void *indices, Nd4jLong const* indicesShape, int *starts, int *lengths, Nd4jLong numOfClasses, void *output, - Nd4jLong *outputShape) { + Nd4jLong const* outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, zIndex; __shared__ T *x; @@ -114,9 +114,9 @@ namespace sd { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMaxTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, - Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, - Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets, T filler = 0) { + static __global__ void segmentMaxTadKernel(void* inputBuf, Nd4jLong const* inputShape, Nd4jLong const* inputTads, + Nd4jLong const* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, + Nd4jLong const* outputShape, Nd4jLong const* outputTads, Nd4jLong const* outputTadOffsets, T filler = 0) { __shared__ T* val; __shared__ Nd4jLong len, zIndex, total; @@ -185,12 +185,12 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentMaxTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } NDArray::registerSpecialUse({output}, {input, indices, &classesRangesBegs, &classesRangesLens}); @@ -218,20 +218,20 @@ namespace sd { dim3 dims(numOfClasses, indices->lengthOf(), numOfClasses * 32 + 32); // int* classesBuf = reinterpret_cast(classes.specialBuffer()); fillUpSegments(indices, numOfClasses, classesRangesBegs, classesRangesLens); - int* begins = reinterpret_cast(classesRangesBegs.getSpecialBuffer()); - int* lengths = reinterpret_cast(classesRangesLens.getSpecialBuffer()); + int* begins = reinterpret_cast(classesRangesBegs.specialBuffer()); + int* lengths = reinterpret_cast(classesRangesLens.specialBuffer()); if (input->isVector()) { unsortedSegmentMaxLinearKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo()); } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); output->assign(-DataTypeUtils::max()); segmentMaxTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); @@ -250,9 +250,9 @@ namespace sd { // segment max // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMaxBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape) { + static __global__ void segmentMaxBPLinearKernel(void* inputBuf, Nd4jLong const* inputShape, void* forwardOutput, + Nd4jLong const* forwardShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + void* outputBuf, Nd4jLong const* outputShape) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -291,12 +291,12 @@ namespace sd { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMaxBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradInTad, Nd4jLong* gradInOffsets, - Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, - Nd4jLong* outOffsets) { + static __global__ void segmentMaxBPTadKernel(void* inputBuf, Nd4jLong const* inputShape, void* forwardOutput, + Nd4jLong const* forwardShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + void* outputBuf, Nd4jLong const* outputShape,Nd4jLong const* inputTad, + Nd4jLong const* inputOffsets, Nd4jLong const* gradInTad, Nd4jLong const* gradInOffsets, + Nd4jLong const* gradOutTad, Nd4jLong const* gradOutOffsets, Nd4jLong const* outTad, + Nd4jLong const* outOffsets) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -349,18 +349,18 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); + Nd4jLong const* gradInTads = packGradIn.specialShapeInfo(); + Nd4jLong const* gradInTadOffsets = packGradIn.specialOffsets(); + Nd4jLong const* gradOutTads = packGradOut.specialShapeInfo(); + Nd4jLong const* gradOutTadOffsets = packGradOut.specialOffsets(); segmentMaxBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), @@ -397,18 +397,18 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); + Nd4jLong const* gradInTads = packGradIn.specialShapeInfo(); + Nd4jLong const* gradInTadOffsets = packGradIn.specialOffsets(); + Nd4jLong const* gradOutTads = packGradOut.specialShapeInfo(); + Nd4jLong const* gradOutTadOffsets = packGradOut.specialOffsets(); segmentMaxBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu index 76036a5e6..c75293c1d 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu @@ -34,7 +34,7 @@ namespace helpers { // Segment ops linear kernels // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMeanLinearKernel(void* input, Nd4jLong* inputShape, int* starts, int* lengths, Nd4jLong numOfClasses, void* output, Nd4jLong* outputShape) { + static __global__ void segmentMeanLinearKernel(void* input, Nd4jLong const* inputShape, int* starts, int* lengths, Nd4jLong numOfClasses, void* output, Nd4jLong const* outputShape) { __shared__ T* val; __shared__ Nd4jLong xLen, zLen, segment, zIndex; __shared__ T* x; @@ -72,7 +72,7 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void unsortedSegmentMeanLinearKernel(void* input, Nd4jLong* inputShape, void* indices, Nd4jLong* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, void* output, Nd4jLong* outputShape) { + static __global__ void unsortedSegmentMeanLinearKernel(void* input, Nd4jLong const* inputShape, void* indices, Nd4jLong const* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, void* output, Nd4jLong const* outputShape) { __shared__ T* val; __shared__ Nd4jLong xLen, zLen, zIndex; __shared__ T* x; @@ -115,7 +115,7 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentMean kernel template - static __global__ void segmentMeanTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentMeanTadKernel(void* inputBuf, Nd4jLong const* inputShape, Nd4jLong const* inputTads, Nd4jLong const* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong const* outputShape, Nd4jLong const* outputTads, Nd4jLong const* outputTadOffsets) { __shared__ T* val; __shared__ Nd4jLong len, zIndex, total; __shared__ T* z; @@ -174,12 +174,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentMeanTadKernel<<sizeAt(0), 512, 2048, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } NDArray::registerSpecialUse({output}, {input, indices}); @@ -216,12 +216,12 @@ namespace helpers { else { output->assign(0); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentMeanTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -237,8 +237,8 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMeanBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - int* lengths, void* outputBuf, Nd4jLong* outputShape) { + static __global__ void segmentMeanBPLinearKernel(void* inputBuf, Nd4jLong const* inputShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + int* lengths, void* outputBuf, Nd4jLong const* outputShape) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -272,9 +272,9 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMeanBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, - void* indicesBuf, Nd4jLong* indicesShape, int* lengths, void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, Nd4jLong* outOffsets) { + static __global__ void segmentMeanBPTadKernel(void* inputBuf, Nd4jLong const* inputShape, void* eps, Nd4jLong const* epsShape, + void* indicesBuf, Nd4jLong const* indicesShape, int* lengths, void* outputBuf, Nd4jLong const* outputShape,Nd4jLong const* inputTad, + Nd4jLong const* inputOffsets, Nd4jLong const* gradOutTad, Nd4jLong const* gradOutOffsets, Nd4jLong const* outTad, Nd4jLong const* outOffsets) { __shared__ T* x; __shared__ T* gradOut; __shared__ I* y; @@ -333,16 +333,16 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); -// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); +// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); + Nd4jLong const* gradOutTads = packGradOut.specialShapeInfo(); + Nd4jLong const* gradOutTadOffsets = packGradOut.specialOffsets(); segmentMeanBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), lengths, @@ -386,16 +386,16 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); -// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); +// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); + Nd4jLong const* gradOutTads = packGradOut.specialShapeInfo(); + Nd4jLong const* gradOutTadOffsets = packGradOut.specialOffsets(); segmentMeanBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), lengths, diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu index 0133b3b11..c6f2d4ed2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu @@ -36,11 +36,11 @@ namespace helpers { template static __global__ void - segmentMinLinearKernel(void *input, Nd4jLong *inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, - void *output, Nd4jLong *outputShape) { + segmentMinLinearKernel(const void *input, const Nd4jLong *inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, + void *output, const Nd4jLong *outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, zIndex; - __shared__ T *x; + __shared__ const T *x; __shared__ T *z; __shared__ int threadsPerSegment, start, finish; @@ -48,7 +48,7 @@ namespace helpers { if (threadIdx.x == 0) { // threadsPerSegment = (gridDim.x + numOfClasses - 1) / numOfClasses; // segment = blockIdx.x / threadsPerSegment; - x = reinterpret_cast(input); + x = reinterpret_cast(input); z = reinterpret_cast(output); extern __shared__ unsigned char shmem[]; val = reinterpret_cast(shmem); @@ -76,25 +76,25 @@ namespace helpers { template static __global__ void - unsortedSegmentMinLinearKernel(void *input, Nd4jLong *inputShape, void *indices, Nd4jLong *indicesShape, + unsortedSegmentMinLinearKernel(const void *input, const Nd4jLong *inputShape, const void *indices, const Nd4jLong *indicesShape, int *starts, int *lengths, Nd4jLong numOfClasses, void *output, - Nd4jLong *outputShape) { + const Nd4jLong *outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, segment, zIndex; __shared__ - T *x; + const T *x; __shared__ T *z; __shared__ - I *y; //int threadsPerSegment, start, finish; + const I *y; //int threadsPerSegment, start, finish; if (threadIdx.x == 0) { segment = blockIdx.x; - x = reinterpret_cast(input); + x = reinterpret_cast(input); z = reinterpret_cast(output); - y = reinterpret_cast(indices); + y = reinterpret_cast(indices); xLen = shape::length(inputShape); zLen = shape::length(outputShape); @@ -106,6 +106,7 @@ namespace helpers { } __syncthreads(); + if (lengths[segment] > 0) for (auto e = threadIdx.x + 1; e < xLen; e += blockDim.x) { auto xIndex = shape::getIndexOffset(e, inputShape); @@ -118,7 +119,7 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentMin kernel template - static __global__ void segmentMinTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentMinTadKernel(const void* inputBuf, const Nd4jLong* inputShape, const Nd4jLong* inputTads, const Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, const Nd4jLong* outputShape, const Nd4jLong* outputTads, const Nd4jLong* outputTadOffsets) { __shared__ T* val; __shared__ Nd4jLong len, zIndex, total; __shared__ T* z; @@ -137,7 +138,7 @@ namespace helpers { auto idx = blockIdx.x; if (blockIdx.x <= total) { - auto x = reinterpret_cast(inputBuf) + inputTadOffsets[idx]; + auto x = reinterpret_cast(inputBuf) + inputTadOffsets[idx]; if (blockIdx.x == start) { for (auto e = threadIdx.x; e < len; e += blockDim.x) { auto xIndex = shape::getIndexOffset(e, inputTads); @@ -161,8 +162,8 @@ namespace helpers { static void segmentMinFunctor_(LaunchContext* context, NDArray* input, NDArray* indices, NDArray* output) { auto stream = context->getCudaStream(); Nd4jLong numClasses = indices->e(indices->lengthOf() - 1) + 1; - NDArray classesRangesLens = NDArrayFactory::create('c', {numClasses}, context); - NDArray classesRangesBegs = NDArrayFactory::create('c', {numClasses}, context); + auto classesRangesLens = NDArrayFactory::create('c', {numClasses}, context); + auto classesRangesBegs = NDArrayFactory::create('c', {numClasses}, context); output->assign(DataTypeUtils::infOrMax()); classesRangesBegs.assign(indices->lengthOf()); classesRangesLens.assign(0); @@ -176,12 +177,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentMinTadKernel<<sizeAt(0), 512, 2048, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -221,12 +222,12 @@ namespace helpers { else { output->assign(DataTypeUtils::max()); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentMinTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -243,20 +244,20 @@ namespace helpers { } template - static __global__ void segmentMinBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape) { - __shared__ T* x; + static __global__ void segmentMinBPLinearKernel(const void* inputBuf, const Nd4jLong* inputShape, void* forwardOutput, + const Nd4jLong* forwardShape, void* eps, const Nd4jLong* epsShape, const void* indicesBuf, const Nd4jLong* indicesShape, + void* outputBuf, const Nd4jLong* outputShape) { + __shared__ const T* x; __shared__ T* gradIn; __shared__ T* gradOut; - __shared__ I* y; + __shared__ const I* y; __shared__ T* z; __shared__ Nd4jLong xLen, gradLen; if (threadIdx.x == 0) { xLen = shape::length(inputShape); - x = reinterpret_cast(inputBuf); - y = reinterpret_cast(indicesBuf); + x = reinterpret_cast(inputBuf); + y = reinterpret_cast(indicesBuf); z = reinterpret_cast(outputBuf); gradIn = reinterpret_cast(forwardOutput); gradOut = reinterpret_cast(eps); @@ -284,23 +285,25 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMinBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradInTad, Nd4jLong* gradInOffsets, - Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, - Nd4jLong* outOffsets) { - __shared__ T* x; + static __global__ void segmentMinBPTadKernel(const void* inputBuf, const Nd4jLong* inputShape, void* forwardOutput, + const Nd4jLong* forwardShape, void* eps, const Nd4jLong* epsShape, + const void* indicesBuf, const Nd4jLong* indicesShape, + void* outputBuf, const Nd4jLong* outputShape, + const Nd4jLong* inputTad, const Nd4jLong* inputOffsets, + const Nd4jLong* gradInTad, const Nd4jLong* gradInOffsets, + const Nd4jLong* gradOutTad, const Nd4jLong* gradOutOffsets, + const Nd4jLong* outTad, const Nd4jLong* outOffsets) { + __shared__ const T* x; __shared__ T* gradIn; __shared__ T* gradOut; - __shared__ I* y; + __shared__ const I* y; __shared__ T* z; __shared__ Nd4jLong xLen, yLen, gradLen, currentLen; if (threadIdx.x == 0) { xLen = shape::length(inputShape); - x = reinterpret_cast(inputBuf); - y = reinterpret_cast(indicesBuf); + x = reinterpret_cast(inputBuf); + y = reinterpret_cast(indicesBuf); z = reinterpret_cast(outputBuf); yLen = shape::length(indicesShape); gradOut = reinterpret_cast(eps); @@ -313,10 +316,10 @@ namespace helpers { for (auto i = blockIdx.x; i < yLen; i += gridDim.x) { auto yIndex = shape::getIndexOffset(i, indicesShape); auto segment = y[yIndex]; - T* current = x + inputOffsets[i]; - T* currentOut = z + outOffsets[i]; - T* in = gradIn + gradInOffsets[segment]; - T* outGrad = gradOut + gradOutOffsets[segment]; + auto current = x + inputOffsets[i]; + auto currentOut = z + outOffsets[i]; + auto in = gradIn + gradInOffsets[segment]; + auto outGrad = gradOut + gradOutOffsets[segment]; for (auto e = threadIdx.x; e < currentLen; e += blockDim.x) { if (sd::math::nd4j_abs(in[e] - current[e]) <= T(1.e-6)) @@ -344,18 +347,18 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradInTads = packGradIn.specialShapeInfo(); + auto gradInTadOffsets = packGradIn.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentMinBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), @@ -392,18 +395,18 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradInTads = packGradIn.specialShapeInfo(); + auto gradInTadOffsets = packGradIn.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentMinBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu index d08f79817..026ded3e7 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu @@ -35,8 +35,8 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentProdLinearKernel(void* input, Nd4jLong* inputShape, int* starts, int* lengths, - Nd4jLong numOfClasses, void* output, Nd4jLong* outputShape) { + static __global__ void segmentProdLinearKernel(void* input, Nd4jLong const* inputShape, int* starts, int* lengths, + Nd4jLong numOfClasses, void* output, Nd4jLong const* outputShape) { __shared__ Nd4jLong xLen, zLen; __shared__ T* x; @@ -66,7 +66,7 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void unsortedSegmentProdLinearKernel(T* input, Nd4jLong* inputShape, I* indices, Nd4jLong* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, T* output, Nd4jLong* outputShape) { + static __global__ void unsortedSegmentProdLinearKernel(T* input, Nd4jLong const* inputShape, I* indices, Nd4jLong const* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, T* output, Nd4jLong const* outputShape) { __shared__ Nd4jLong xLen, zLen; if (threadIdx.x == 0) { @@ -90,9 +90,9 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentProd kernel template - static __global__ void segmentProdTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, - Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, - Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentProdTadKernel(void* inputBuf, Nd4jLong const* inputShape, Nd4jLong const* inputTads, + Nd4jLong const* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, + Nd4jLong const* outputShape, Nd4jLong const* outputTads, Nd4jLong const* outputTadOffsets) { __shared__ Nd4jLong len, total; @@ -138,12 +138,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentProdTadKernel<<<128, 512, 2048, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -181,12 +181,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentProdTadKernel<<<128, 256, 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -202,9 +202,9 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentProdBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape) { + static __global__ void segmentProdBPLinearKernel(void* inputBuf, Nd4jLong const* inputShape, void* forwardOutput, + Nd4jLong const* forwardShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + void* outputBuf, Nd4jLong const* outputShape) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -240,12 +240,12 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentProdBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradInTad, Nd4jLong* gradInOffsets, - Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, - Nd4jLong* outOffsets) { + static __global__ void segmentProdBPTadKernel(void* inputBuf, Nd4jLong const* inputShape, void* forwardOutput, + Nd4jLong const* forwardShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + void* outputBuf, Nd4jLong const* outputShape, Nd4jLong const* inputTad, + Nd4jLong const* inputOffsets, Nd4jLong const* gradInTad, Nd4jLong const* gradInOffsets, + Nd4jLong const* gradOutTad, Nd4jLong const* gradOutOffsets, Nd4jLong const* outTad, + Nd4jLong const* outOffsets) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -278,7 +278,6 @@ namespace helpers { currentOut[e] = outGrad[e] * in[e] / current[e]; } } - } // -------------------------------------------------------------------------------------------------------------- // @@ -297,18 +296,18 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradInTads = packGradIn.specialShapeInfo(); + auto gradInTadOffsets = packGradIn.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentProdBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), @@ -347,18 +346,18 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradInTads = packGradIn.specialShapeInfo(); + auto gradInTadOffsets = packGradIn.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentProdBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu index f9b6eaad0..b72abeffc 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu @@ -32,7 +32,7 @@ namespace ops { namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void unsortedSegmentSqrtNLinearKernel(T* input, Nd4jLong* inputShape, I* indices, Nd4jLong* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, T* output, Nd4jLong* outputShape) { + static __global__ void unsortedSegmentSqrtNLinearKernel(T* input, Nd4jLong const* inputShape, I* indices, Nd4jLong const* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, T* output, Nd4jLong const* outputShape) { __shared__ Nd4jLong xLen, zLen; if (threadIdx.x == 0) { @@ -57,7 +57,7 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentSqrtN kernel template - static __global__ void segmentSqrtNTadKernel(T* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentSqrtNTadKernel(T* inputBuf, Nd4jLong const* inputShape, Nd4jLong const* inputTads, Nd4jLong const* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong const* outputShape, Nd4jLong const* outputTads, Nd4jLong const* outputTadOffsets) { __shared__ Nd4jLong len, total; @@ -108,12 +108,12 @@ namespace helpers { else { output->nullify(); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentSqrtNTadKernel<<>>( input->dataBuffer()->specialAsT(), input->specialShapeInfo(), inputTads, inputTadOffsets, indices->dataBuffer()->specialAsT(), @@ -129,8 +129,8 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentSqrtNBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - int* lengths, void* outputBuf, Nd4jLong* outputShape) { + static __global__ void segmentSqrtNBPLinearKernel(void* inputBuf, Nd4jLong const* inputShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + int* lengths, void* outputBuf, Nd4jLong const* outputShape) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -165,9 +165,9 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentSqrtNBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, - void* indicesBuf, Nd4jLong* indicesShape, int* lengths, void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, Nd4jLong* outOffsets) { + static __global__ void segmentSqrtNBPTadKernel(void* inputBuf, Nd4jLong const* inputShape, void* eps, Nd4jLong const* epsShape, + void* indicesBuf, Nd4jLong const* indicesShape, int* lengths, void* outputBuf, Nd4jLong const* outputShape,Nd4jLong const* inputTad, + Nd4jLong const* inputOffsets, Nd4jLong const* gradOutTad, Nd4jLong const* gradOutOffsets, Nd4jLong const* outTad, Nd4jLong const* outOffsets) { __shared__ T* x; __shared__ T* gradOut; __shared__ I* y; @@ -226,16 +226,16 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); -// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); +// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentSqrtNBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), lengths, diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu index 56d53710f..7a762a526 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu @@ -35,14 +35,16 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template static __global__ void - segmentSumLinearKernel(void *input, Nd4jLong *inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, - void *output, Nd4jLong *outputShape) { + segmentSumLinearKernel( + const void *input, const Nd4jLong *inputShape, + int *starts, int *lengths, Nd4jLong numOfClasses, + void *output, const Nd4jLong *outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, segment, zIndex; __shared__ - T *x; + const T *x; __shared__ T *z; __shared__ int threadsPerSegment, start, finish; @@ -50,7 +52,7 @@ namespace helpers { if (threadIdx.x == 0) { threadsPerSegment = (gridDim.x + numOfClasses - 1) / numOfClasses; segment = blockIdx.x / threadsPerSegment; - x = reinterpret_cast(input); + x = reinterpret_cast(input); z = reinterpret_cast(output); xLen = shape::length(inputShape); @@ -77,25 +79,27 @@ namespace helpers { template static __global__ void - unsortedSegmentSumLinearKernel(void *input, Nd4jLong *inputShape, void *indices, Nd4jLong *indicesShape, - int *starts, int *lengths, Nd4jLong numOfClasses, void *output, - Nd4jLong *outputShape) { + unsortedSegmentSumLinearKernel( + const void *input, const Nd4jLong *inputShape, + const void *indices, const Nd4jLong *indicesShape, + int *starts, int *lengths, Nd4jLong numOfClasses, + void *output, const Nd4jLong *outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, segment, zIndex; __shared__ - T *x; + const T *x; __shared__ T *z; __shared__ - I *y; //int threadsPerSegment, start, finish; + const I *y; //int threadsPerSegment, start, finish; if (threadIdx.x == 0) { segment = blockIdx.x; - x = reinterpret_cast(input); + x = reinterpret_cast(input); z = reinterpret_cast(output); - y = reinterpret_cast(indices); + y = reinterpret_cast(indices); xLen = shape::length(inputShape); zLen = shape::length(outputShape); @@ -119,7 +123,11 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentSum kernel template - static __global__ void segmentSumTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentSumTadKernel( + const void* inputBuf, const Nd4jLong* inputShape, const Nd4jLong* inputTads, const Nd4jLong* inputTadOffsets, + const I* indices, + int* starts, int* lengths, Nd4jLong numOfClasses, + void* outputBuf, const Nd4jLong* outputShape, const Nd4jLong* outputTads, const Nd4jLong* outputTadOffsets) { __shared__ T* val; __shared__ Nd4jLong len, zIndex, total; __shared__ T* z; @@ -138,7 +146,7 @@ namespace helpers { auto idx = blockIdx.x; if (blockIdx.x <= total) { - auto x = reinterpret_cast(inputBuf) + inputTadOffsets[idx]; + auto x = reinterpret_cast(inputBuf) + inputTadOffsets[idx]; if (blockIdx.x == start) { for (auto e = threadIdx.x; e < len; e += blockDim.x) { auto xIndex = shape::getIndexOffset(e, inputTads); @@ -178,12 +186,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentSumTadKernel<<sizeAt(0), 512, 2048, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -219,12 +227,12 @@ namespace helpers { else { output->assign(0); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentSumTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -245,21 +253,19 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // Sorted sum backpropagate template - static __global__ void segmentSumBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, - void* indicesBuf, Nd4jLong* indicesShape, void* outputBuf, Nd4jLong* outputShape) { - __shared__ T* x; - __shared__ T* gradIn; - __shared__ T* gradOut; - __shared__ I* y; - __shared__ T* z; + static __global__ void segmentSumBPLinearKernel( + const void* inputBuf, const Nd4jLong* inputShape, + const void* eps, const Nd4jLong* epsShape, + const void* indicesBuf, const Nd4jLong* indicesShape, + void* outputBuf, const Nd4jLong* outputShape) { + auto x = reinterpret_cast(inputBuf); + auto y = reinterpret_cast(indicesBuf); + auto z = reinterpret_cast(outputBuf); + auto gradOut = reinterpret_cast(eps); __shared__ Nd4jLong xLen, gradLen; if (threadIdx.x == 0) { xLen = shape::length(inputShape); - x = reinterpret_cast(inputBuf); - y = reinterpret_cast(indicesBuf); - z = reinterpret_cast(outputBuf); - gradOut = reinterpret_cast(eps); gradLen = shape::length(epsShape); } __syncthreads(); @@ -280,22 +286,27 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentSumBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, - void* indicesBuf, Nd4jLong* indicesShape, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, Nd4jLong* outOffsets) { - __shared__ T* x; - __shared__ T* gradOut; - __shared__ I* y; + static __global__ void segmentSumBPTadKernel( + const void* inputBuf, const Nd4jLong* inputShape, + const void* eps, const Nd4jLong* epsShape, + const void* indicesBuf, const Nd4jLong* indicesShape, + void* outputBuf, const Nd4jLong* outputShape, + const Nd4jLong* inputTad, const Nd4jLong* inputOffsets, + const Nd4jLong* gradOutTad, const Nd4jLong* gradOutOffsets, + const Nd4jLong* outTad, const Nd4jLong* outOffsets) { + __shared__ const T* x; + __shared__ const T* gradOut; + __shared__ const I* y; __shared__ T* z; __shared__ Nd4jLong xLen, yLen, gradLen, currentLen; if (threadIdx.x == 0) { xLen = shape::length(inputShape); - x = reinterpret_cast(inputBuf); - y = reinterpret_cast(indicesBuf); + x = reinterpret_cast(inputBuf); + y = reinterpret_cast(indicesBuf); z = reinterpret_cast(outputBuf); yLen = shape::length(indicesShape); - gradOut = reinterpret_cast(eps); + gradOut = reinterpret_cast(eps); gradLen = shape::length(epsShape); currentLen = shape::length(outTad); } @@ -304,8 +315,8 @@ namespace helpers { for (auto i = blockIdx.x; i < yLen; i += gridDim.x) { auto yIndex = shape::getIndexOffset(i, indicesShape); auto segment = y[yIndex]; - T* currentOut = z + outOffsets[i]; - T* outGrad = gradOut + gradOutOffsets[segment]; + auto currentOut = z + outOffsets[i]; + auto outGrad = gradOut + gradOutOffsets[segment]; for (auto e = threadIdx.x; e < currentLen; e += blockDim.x) { currentOut[e] = outGrad[e]; @@ -327,15 +338,15 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentSumBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), @@ -368,15 +379,15 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentSumBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/sequence_mask.cu b/libnd4j/include/ops/declarable/helpers/cuda/sequence_mask.cu index b06797753..51b7590c0 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/sequence_mask.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/sequence_mask.cu @@ -25,13 +25,13 @@ namespace ops { namespace helpers { template - static __global__ void sequenceMaskKernel(void* inputBuf, Nd4jLong* inputShape, void* outputBuf, Nd4jLong* outputShape, int maxIndex) { + static __global__ void sequenceMaskKernel(const void* inputBuf, const Nd4jLong* inputShape, void* outputBuf, const Nd4jLong* outputShape, int maxIndex) { - __shared__ I* input; + __shared__ const I* input; __shared__ B* output; __shared__ Nd4jLong inputLen, outputLen; if (threadIdx.x == 0) { - input = reinterpret_cast(inputBuf); + input = reinterpret_cast(inputBuf); output = reinterpret_cast(outputBuf); inputLen = shape::length(inputShape); outputLen = shape::length(outputShape); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu b/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu index f85a855b7..3957f23d5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu @@ -289,7 +289,7 @@ namespace sd { if (irow < 0 || irow >= vocabSize) continue; - auto syn1row = reinterpret_cast(s1.getSpecialBuffer()) + (irow * vectorLength); + auto syn1row = reinterpret_cast(s1.specialBuffer()) + (irow * vectorLength); auto code = bCodes[e + cShift]; //nd4j_printf("syn0: [%i]; syn1: [%i]; code: [%i]\n", target, irow, code); @@ -315,7 +315,7 @@ namespace sd { if (irow == nsStarter) continue; } - auto syn1row = reinterpret_cast(s1n.getSpecialBuffer()) + (irow * vectorLength); + auto syn1row = reinterpret_cast(s1n.specialBuffer()) + (irow * vectorLength); nSampling_(syn0row, syn1row, expTable, neu1e, alpha, vectorLength, r == 0 ? 1 : 0, expLength, false, stream); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/solve.cu index 74823483e..cf8308bbe 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/solve.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/solve.cu @@ -34,7 +34,7 @@ namespace sd { namespace helpers { template - static __global__ void oneOnDiagonalKernel(T* ioBuf, Nd4jLong* ioShape, Nd4jLong* tadShape, Nd4jLong* tadOffsets, Nd4jLong batchNum, Nd4jLong rowNum) { + static __global__ void oneOnDiagonalKernel(T* ioBuf, Nd4jLong const* ioShape, Nd4jLong const* tadShape, Nd4jLong const* tadOffsets, Nd4jLong batchNum, Nd4jLong rowNum) { for (auto i = blockIdx.x; i < batchNum; i += gridDim.x) { auto matrixPart = ioBuf + tadOffsets[i]; for (auto j = threadIdx.x; j < rowNum; j += blockDim.x) { @@ -47,8 +47,8 @@ namespace sd { } template - static __global__ void restorePermutationsKernel(T* PBuf, Nd4jLong* PShapeInfo, int const* permutationsBuf, - Nd4jLong* PTadShapeInfo, Nd4jLong* PTadSOffsets, Nd4jLong* permutationsTadShapeInfo, Nd4jLong* permutationsTadOffsets, Nd4jLong batchNum, Nd4jLong rowNum) { + static __global__ void restorePermutationsKernel(T* PBuf, Nd4jLong const* PShapeInfo, int const* permutationsBuf, + Nd4jLong const* PTadShapeInfo, Nd4jLong const* PTadSOffsets, Nd4jLong const* permutationsTadShapeInfo, Nd4jLong const* permutationsTadOffsets, Nd4jLong batchNum, Nd4jLong rowNum) { for (auto batch = blockIdx.x; batch < batchNum; batch += gridDim.x) { auto permutations = permutationsBuf + permutationsTadOffsets[batch]; auto P = PBuf + PTadSOffsets[batch]; @@ -73,12 +73,12 @@ namespace sd { helpers::lu(context, leftInput, &leftOutput, &permutations); auto leftLower = leftOutput.dup(); auto rightOutput = rightInput->ulike(); - auto leftLowerTad = ConstantTadHelper::getInstance()->tadForDimensions(leftLower.getShapeInfo(), {-2, -1}); + auto leftLowerTad = ConstantTadHelper::getInstance()->tadForDimensions(leftLower.shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); oneOnDiagonalKernel<<<128, 256, 256, *stream>>>(leftLower.dataBuffer()->specialAsT(), leftLower.specialShapeInfo(), leftLowerTad.specialShapeInfo(), leftLowerTad.specialOffsets(), leftLowerTad.numberOfTads(), leftLower.sizeAt(-1)); auto P = leftOutput.ulike(); P.nullify(); - auto PTad = ConstantTadHelper::getInstance()->tadForDimensions(P.getShapeInfo(), {-2, -1}); - auto permutationsTad = ConstantTadHelper::getInstance()->tadForDimensions(permutations.getShapeInfo(), {-1}); + auto PTad = ConstantTadHelper::getInstance()->tadForDimensions(P.shapeInfo(), {-2, -1}); + auto permutationsTad = ConstantTadHelper::getInstance()->tadForDimensions(permutations.shapeInfo(), {-1}); restorePermutationsKernel<<<128, 256, 256, *stream>>>(P.dataBuffer()->specialAsT(), P.specialShapeInfo(), permutations.dataBuffer()->specialAsT(), PTad.specialShapeInfo(), PTad.specialOffsets(), permutationsTad.specialShapeInfo(), permutationsTad.specialOffsets(), permutationsTad.numberOfTads(), permutations.sizeAt(-1)); P.tickWriteDevice(); @@ -99,8 +99,8 @@ namespace sd { } template - static __global__ void adjointKernel(T* output, Nd4jLong batchSize, Nd4jLong rows, Nd4jLong columns, Nd4jLong* outputTads, - Nd4jLong* outputOffsets) { + static __global__ void adjointKernel(T* output, Nd4jLong batchSize, Nd4jLong rows, Nd4jLong columns, Nd4jLong const* outputTads, + Nd4jLong const* outputOffsets) { for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { auto outputPart = output + outputOffsets[b]; @@ -120,8 +120,8 @@ namespace sd { template static void adjointMatrix_(sd::LaunchContext* context, NDArray const* input, NDArray* output) { NDArray::prepareSpecialUse({output}, {input}); - auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {-2, -1}); - auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {-2, -1}); + auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {-2, -1}); + auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); auto outputBuf = reinterpret_cast(output->specialBuffer()); auto rows = input->sizeAt(-2); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/split.cu b/libnd4j/include/ops/declarable/helpers/cuda/split.cu index 5690d786c..19c58b89e 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/split.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/split.cu @@ -103,12 +103,12 @@ void split(sd::LaunchContext* context, const NDArray& input, std::vector(input.getSpecialBuffer()); + auto x = static_cast(input.specialBuffer()); for (uint i = 0; i < numOfSubArrs; ++i) { const auto memAmountToCopy = outArrs[i]->lengthOf() * sizeofT; - cudaMemcpyAsync(static_cast(outArrs[i]->getSpecialBuffer()), x, memAmountToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); - x = static_cast(x) + memAmountToCopy; + cudaMemcpyAsync(static_cast(outArrs[i]->specialBuffer()), x, memAmountToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); + x = static_cast(x) + memAmountToCopy; } if(cudaStreamSynchronize(*context->getCudaStream()) != 0) @@ -135,7 +135,7 @@ void split(sd::LaunchContext* context, const NDArray& input, std::vectorgetShapeInfo()); + // strideOfContigStride[i] = shape::strideOverContigAxis(axis, outArrs[i]->shapeInfo()); // } // } @@ -143,16 +143,16 @@ void split(sd::LaunchContext* context, const NDArray& input, std::vectorsizeAt(axis); // same for all outArrs // for (uint i = 0; i < input.lengthOf() / input.sizeAt(axis); ++i) { // const auto iShift = i * sizeofT; - // void* x = static_cast(input.getSpecialBuffer()) + xStep * iShift; + // void* x = static_cast(input.specialBuffer()) + xStep * iShift; // for (uint j = 0; j < numOfSubArrs; ++j) { - // void* z = static_cast(outArrs[j]->getSpecialBuffer()) + strideOfContigStride[j] * iShift; + // void* z = static_cast(outArrs[j]->specialBuffer()) + strideOfContigStride[j] * iShift; // const auto memSizeToCopy = zDim * sizeofT; // cudaMemcpyAsync(z, x, memSizeToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); // x = static_cast(x) + memSizeToCopy; @@ -171,13 +171,13 @@ void split(sd::LaunchContext* context, const NDArray& input, std::vector hOutBuffers(numOfSubArrs); for(int i = 0; i < numOfSubArrs; ++i) - hOutBuffers[i] = outArrs[i]->getSpecialBuffer(); + hOutBuffers[i] = outArrs[i]->specialBuffer(); PointersManager manager(context, "helpers::split"); void* dOutBuffers = manager.replicatePointer(hOutBuffers.data(), hOutBuffers.size() * sizeof(void*)); - BUILD_SINGLE_SELECTOR(input.dataType(), splitCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), dOutBuffers, outArrs[0]->specialShapeInfo(), axis), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), splitCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers, outArrs[0]->specialShapeInfo(), axis), LIBND4J_TYPES); manager.synchronize(); // } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/sru.cu b/libnd4j/include/ops/declarable/helpers/cuda/sru.cu index 518525ecf..b59ac0052 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/sru.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/sru.cu @@ -248,7 +248,7 @@ void sruBI(sd::LaunchContext * context, NDArray* x, const NDArray* w, const NDAr const int sharedMem = threadsPerBlock * sizeof(int) * x->rankOf() + 128; NDArray::prepareSpecialUse({ht, ct}, {x, &wi, b, c0, mask}); - BUILD_SINGLE_SELECTOR(x->dataType(), sruBICudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), wi.getSpecialBuffer(), wi.getSpecialShapeInfo(), b->getSpecialBuffer(), b->getSpecialShapeInfo(), c0->getSpecialBuffer(), c0->getSpecialShapeInfo(), mask ? mask->getSpecialBuffer() : nullptr, mask ? mask->getSpecialShapeInfo() : nullptr, ht->specialBuffer(), ht->specialShapeInfo(), ct->specialBuffer(), ct->specialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(x->dataType(), sruBICudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->specialBuffer(), x->specialShapeInfo(), wi.specialBuffer(), wi.specialShapeInfo(), b->specialBuffer(), b->specialShapeInfo(), c0->specialBuffer(), c0->specialShapeInfo(), mask ? mask->specialBuffer() : nullptr, mask ? mask->specialShapeInfo() : nullptr, ht->specialBuffer(), ht->specialShapeInfo(), ct->specialBuffer(), ct->specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({ht, ct}, {x, &wi, b, c0, mask}); manager.synchronize(); @@ -516,7 +516,7 @@ void sruBIBP(sd::LaunchContext* context, NDArray* x, const NDArray* w, const NDA const int sharedMem = threadsPerBlock * sizeof(int) * x->rankOf() + 128; NDArray::prepareSpecialUse({gradI, &gradWi, &gradBias, gradC0}, {x, &wi, b, c0, ct, gradCt, gradHt, mask}); - BUILD_SINGLE_SELECTOR(x->dataType(), sruBIBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), wi.getSpecialBuffer(), wi.getSpecialShapeInfo(), b->getSpecialBuffer(), b->getSpecialShapeInfo(), c0->getSpecialBuffer(), c0->getSpecialShapeInfo(), mask ? mask->getSpecialBuffer() : nullptr, mask ? mask->getSpecialShapeInfo() : nullptr, ct->getSpecialBuffer(), ct->getSpecialShapeInfo(), gradHt->getSpecialBuffer(), gradHt->getSpecialShapeInfo(), gradCt->getSpecialBuffer(), gradCt->getSpecialShapeInfo(), gradI->specialBuffer(), gradI->specialShapeInfo(), gradWi.specialBuffer(), gradWi.specialShapeInfo(), gradBias.specialBuffer(), gradBias.specialShapeInfo(), gradC0->specialBuffer(), gradC0->specialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(x->dataType(), sruBIBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->specialBuffer(), x->specialShapeInfo(), wi.specialBuffer(), wi.specialShapeInfo(), b->specialBuffer(), b->specialShapeInfo(), c0->specialBuffer(), c0->specialShapeInfo(), mask ? mask->specialBuffer() : nullptr, mask ? mask->specialShapeInfo() : nullptr, ct->specialBuffer(), ct->specialShapeInfo(), gradHt->specialBuffer(), gradHt->specialShapeInfo(), gradCt->specialBuffer(), gradCt->specialShapeInfo(), gradI->specialBuffer(), gradI->specialShapeInfo(), gradWi.specialBuffer(), gradWi.specialShapeInfo(), gradBias.specialBuffer(), gradBias.specialShapeInfo(), gradC0->specialBuffer(), gradC0->specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({gradI, &gradWi, &gradBias, gradC0}, {x, &wi, b, c0, ct, gradCt, gradHt, mask}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/stack.cu b/libnd4j/include/ops/declarable/helpers/cuda/stack.cu index 89859ae1d..f0983b76c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/stack.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/stack.cu @@ -73,10 +73,10 @@ static void stack_(sd::LaunchContext* context, const std::vector if(inArrs[0]->rankOf() == 0) { - std::vector hInBuffers(numOfSubArrs); + std::vector hInBuffers(numOfSubArrs); for(int i = 0; i < numOfSubArrs; ++i) - hInBuffers[i] = inArrs[i]->getSpecialBuffer(); + hInBuffers[i] = inArrs[i]->specialBuffer(); PointersManager manager(context, "helpers::stack cuda"); @@ -91,15 +91,15 @@ static void stack_(sd::LaunchContext* context, const std::vector } else { - auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); - Nd4jLong* zTadShapeInfo = zTadPack.primaryShapeInfo(); + auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); + auto zTadShapeInfo = zTadPack.primaryShapeInfo(); for (uint i = 0; i < numOfSubArrs; ++i) { void* zBuff = output.specialBufferWithOffset(zTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(context, transform::Assign, - nullptr, inArrs[i]->getShapeInfo(), inArrs[i]->getSpecialBuffer(), inArrs[i]->getSpecialShapeInfo(), + nullptr, inArrs[i]->shapeInfo(), inArrs[i]->specialBuffer(), inArrs[i]->specialShapeInfo(), nullptr, zTadShapeInfo, zBuff, zTadPack.specialShapeInfo(), nullptr, nullptr, nullptr, false/*allowParallelism*/); } @@ -164,7 +164,7 @@ static void unstack_(sd::LaunchContext* context, const NDArray& input, const std std::vector hOutBuffers(numOfSubArrs); for(int i = 0; i < numOfSubArrs; ++i) - hOutBuffers[i] = outArrs[i]->getSpecialBuffer(); + hOutBuffers[i] = outArrs[i]->specialBuffer(); PointersManager manager(context, "helpers::unstack cuda"); @@ -173,22 +173,22 @@ static void unstack_(sd::LaunchContext* context, const NDArray& input, const std const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (input.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - unstackScalarsCudaLauncher(blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), dOutBuffers); + unstackScalarsCudaLauncher(blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers); manager.synchronize(); } else { - auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); - Nd4jLong* xTadShapeInfo = xTadPack.primaryShapeInfo(); + auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); + auto xTadShapeInfo = xTadPack.primaryShapeInfo(); for (uint i = 0; i < numOfSubArrs; ++i) { - void* xBuff = input.specialBufferWithOffset(xTadPack.primaryOffsets()[i]); + auto xBuff = input.specialBufferWithOffset(xTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input.getContext(), transform::Assign, nullptr, xTadShapeInfo, xBuff, xTadPack.specialShapeInfo(), - nullptr, outArrs[i]->getShapeInfo(), outArrs[i]->specialBuffer(), outArrs[i]->specialShapeInfo(), + nullptr, outArrs[i]->shapeInfo(), outArrs[i]->specialBuffer(), outArrs[i]->specialShapeInfo(), nullptr, nullptr, nullptr, false/*allowParallelism*/); } } @@ -262,7 +262,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // std::vector hOutBuffers(numOfSubArrs); // for(int i = 0; i < numOfSubArrs; ++i) -// hOutBuffers[i] = outArrs[i]->getSpecialBuffer(); +// hOutBuffers[i] = outArrs[i]->specialBuffer(); // PointersManager manager(context, "helpers::unstack"); @@ -272,7 +272,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // outArrs[i]->syncToDevice(); // input.syncToDevice(); -// BUILD_SINGLE_SELECTOR(input.dataType(), unstackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), dOutBuffers, outArrs[0]->getSpecialShapeInfo(), axis), LIBND4J_TYPES); +// BUILD_SINGLE_SELECTOR(input.dataType(), unstackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers, outArrs[0]->specialShapeInfo(), axis), LIBND4J_TYPES); // manager.synchronize(); @@ -340,7 +340,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // std::vector hInBuffers(numOfSubArrs); // for(int i = 0; i < numOfSubArrs; ++i) -// hInBuffers[i] = inArrs[i]->getSpecialBuffer(); +// hInBuffers[i] = inArrs[i]->specialBuffer(); // PointersManager manager(context, "helpers::stack"); @@ -350,7 +350,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // inArrs[i]->syncToDevice(); // output.syncToDevice(); -// BUILD_SINGLE_SELECTOR(output.dataType(), stackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), dInBuffers, inArrs[0]->getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), axis), LIBND4J_TYPES); +// BUILD_SINGLE_SELECTOR(output.dataType(), stackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), dInBuffers, inArrs[0]->specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), axis), LIBND4J_TYPES); // manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/svd.cu b/libnd4j/include/ops/declarable/helpers/cuda/svd.cu index 5c3d2811c..33dd0251a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/svd.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/svd.cu @@ -229,10 +229,10 @@ static void svdQR(sd::LaunchContext* context, const NDArray* A, NDArray* S, NDAr // choose appropriate cuda gemm api depending on data types if(A->dataType() == DataType::DOUBLE) { - status = cusolverDnDgesvd(*handle, jobu, jobvt, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pVT->getSpecialBuffer()) : nullptr, ldvt, reinterpret_cast(dWork), lwork, reinterpret_cast(rWork), devInfo); + status = cusolverDnDgesvd(*handle, jobu, jobvt, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pVT->specialBuffer()) : nullptr, ldvt, reinterpret_cast(dWork), lwork, reinterpret_cast(rWork), devInfo); } else if(A->dataType() == DataType::FLOAT32) { - status = cusolverDnSgesvd(*handle, jobu, jobvt, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pVT->getSpecialBuffer()) : nullptr, ldvt, reinterpret_cast(dWork), lwork, reinterpret_cast(rWork), devInfo); + status = cusolverDnSgesvd(*handle, jobu, jobvt, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pVT->specialBuffer()) : nullptr, ldvt, reinterpret_cast(dWork), lwork, reinterpret_cast(rWork), devInfo); } else throw std::invalid_argument("svdQR: given data type is unsupported !"); @@ -386,7 +386,7 @@ static void svdJcb(sd::LaunchContext* context, const NDArray* A, NDArray* S, NDA if(!calcUV && m != n) { int maxDim = m > n ? m : n; arrToAvoidBugInAPI = new NDArray('c', {maxDim, maxDim}, pA->dataType(), context); - nullPtr = arrToAvoidBugInAPI->getSpecialBuffer(); + nullPtr = arrToAvoidBugInAPI->specialBuffer(); } // ****************** @@ -395,9 +395,9 @@ static void svdJcb(sd::LaunchContext* context, const NDArray* A, NDArray* S, NDA // query working space of SVD int lwork = 0; if(A->dataType() == DataType::DOUBLE) - status = cusolverDnDgesvdj_bufferSize(*handle, jobz, econ, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldv, &lwork, gesvdjParams); + status = cusolverDnDgesvdj_bufferSize(*handle, jobz, econ, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : reinterpret_cast(nullPtr), ldv, &lwork, gesvdjParams); else if(A->dataType() == DataType::FLOAT32) - status = cusolverDnSgesvdj_bufferSize(*handle, jobz, econ, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldv, &lwork, gesvdjParams); + status = cusolverDnSgesvdj_bufferSize(*handle, jobz, econ, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : reinterpret_cast(nullPtr), ldv, &lwork, gesvdjParams); else throw std::invalid_argument("svdJcb: given data type is unsupported !"); @@ -414,10 +414,10 @@ static void svdJcb(sd::LaunchContext* context, const NDArray* A, NDArray* S, NDA // choose appropriate cuda gemm api depending on data types if(A->dataType() == DataType::DOUBLE) { - status = cusolverDnDgesvdj(*handle, jobz, econ, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams); + status = cusolverDnDgesvdj(*handle, jobz, econ, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : reinterpret_cast(nullPtr), ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams); } else if(A->dataType() == DataType::FLOAT32) { - status = cusolverDnSgesvdj(*handle, jobz, econ, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams); + status = cusolverDnSgesvdj(*handle, jobz, econ, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : reinterpret_cast(nullPtr), ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams); } else throw std::invalid_argument("svdJcb: given data type is unsupported !"); @@ -570,9 +570,9 @@ static void svdBatched(sd::LaunchContext* context, const NDArray* A, NDArray* S, // query working space of SVD int lwork = 0; if(A->dataType() == DataType::DOUBLE) - status = cusolverDnDgesvdjBatched_bufferSize(handle, jobz, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : nullptr, ldv, &lwork, gesvdjParams, bS); + status = cusolverDnDgesvdjBatched_bufferSize(handle, jobz, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : nullptr, ldv, &lwork, gesvdjParams, bS); else if(A->dataType() == DataType::FLOAT32) - status = cusolverDnSgesvdjBatched_bufferSize(handle, jobz, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : nullptr, ldv, &lwork, gesvdjParams, bS); + status = cusolverDnSgesvdjBatched_bufferSize(handle, jobz, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : nullptr, ldv, &lwork, gesvdjParams, bS); else throw std::invalid_argument("svdBatched: given data type is unsupported !"); @@ -594,10 +594,10 @@ static void svdBatched(sd::LaunchContext* context, const NDArray* A, NDArray* S, // choose appropriate cuda gemm api depending on data types if(A->dataType() == DataType::DOUBLE) { - status = cusolverDnDgesvdjBatched(handle, jobz, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : nullptr, ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams, bS); + status = cusolverDnDgesvdjBatched(handle, jobz, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : nullptr, ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams, bS); } else if(A->dataType() == DataType::FLOAT32) { - status = cusolverDnSgesvdjBatched(handle, jobz, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : nullptr, ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams, bS); + status = cusolverDnSgesvdjBatched(handle, jobz, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : nullptr, ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams, bS); } else throw std::invalid_argument("svdBatched: given data type is unsupported !"); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu b/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu index b344f570e..ce19d41cc 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu @@ -91,7 +91,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const PointersManager manager(context, "in_top_k"); - const auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(predictions->getShapeInfo(), {1}); + const auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(predictions->shapeInfo(), {1}); const int threadsPerBlock = MAX_NUM_THREADS; const int blocksPerGrid = static_cast(packX.numberOfTads()); @@ -101,7 +101,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const const auto yType = targets->dataType(); NDArray::prepareSpecialUse({output}, {predictions, targets}); - BUILD_DOUBLE_SELECTOR(xType, yType, inTopKCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), predictions->getSpecialBuffer(), predictions->getSpecialShapeInfo(), targets->getSpecialBuffer(), targets->getSpecialShapeInfo(), output->getSpecialBuffer(), output->getSpecialShapeInfo(), packX.specialShapeInfo(), packX.specialOffsets(), k), FLOAT_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, inTopKCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), predictions->specialBuffer(), predictions->specialShapeInfo(), targets->specialBuffer(), targets->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), packX.specialShapeInfo(), packX.specialOffsets(), k), FLOAT_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({output}, {predictions, targets}); manager.synchronize(); @@ -110,10 +110,10 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const } template - static _CUDA_G void topValuesMover(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vi, Nd4jLong *iTadShapeInfo, Nd4jLong *iTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, Nd4jLong tadLength, int numTads, int k) { + static _CUDA_G void topValuesMover(void const* vx, Nd4jLong const* xTadShapeInfo, Nd4jLong const* xTadOffsets, void const* vi, Nd4jLong const* iTadShapeInfo, Nd4jLong const* iTadOffsets, void *vz, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets, Nd4jLong tadLength, int numTads, int k) { for (int t = blockIdx.x; t < numTads; t += gridDim.x) { - auto x = reinterpret_cast(vx) + xTadOffsets[t]; - auto i = reinterpret_cast(vi) + iTadOffsets[t]; + auto x = reinterpret_cast(vx) + xTadOffsets[t]; + auto i = reinterpret_cast(vi) + iTadOffsets[t]; auto z = reinterpret_cast(vz) + zTadOffsets[t]; for (int e = threadIdx.x; e < k; e += blockDim.x) { @@ -126,7 +126,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const template - static _CUDA_G void indicesAlongDimension(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vi, Nd4jLong *iTadShapeInfo, Nd4jLong *iTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, Nd4jLong tadLength, int numTads, int k, int scanWidth, bool needSort) { + static _CUDA_G void indicesAlongDimension(void const* vx, Nd4jLong const* xTadShapeInfo, Nd4jLong const* xTadOffsets, void* vi, Nd4jLong const* iTadShapeInfo, Nd4jLong const* iTadOffsets, void *vz, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets, Nd4jLong tadLength, int numTads, int k, int scanWidth, bool needSort) { extern __shared__ char _shmem[]; X* tempValues = reinterpret_cast(_shmem) + threadIdx.x * scanWidth; @@ -138,8 +138,8 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const __syncthreads(); for (int t = blockIdx.x; t < numTads; t += gridDim.x) { - auto x = reinterpret_cast(vx) + xTadOffsets[t]; - auto i = reinterpret_cast(vi) + iTadOffsets[t]; + auto x = reinterpret_cast(vx) + xTadOffsets[t]; + auto i = reinterpret_cast(vi) + iTadOffsets[t]; auto z = reinterpret_cast(vz) + zTadOffsets[t]; // we'll do multiple reads here @@ -243,7 +243,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const template static int topKFunctor_(sd::LaunchContext * context, const NDArray* input, NDArray* values, NDArray* indices, const uint k, bool needSort) { - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 1}); auto packI = ConstantTadHelper::getInstance()->tadForDimensions(indices->shapeInfo(), {input->rankOf() - 1}); auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(values->shapeInfo(), {input->rankOf() - 1}); @@ -254,13 +254,13 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const input->applyIndexReduce(indexreduce::IndexMax, *indices, {input->rankOf() - 1}); // copy values on specified indices - topValuesMover<<<256, 256, 1024, *context->getCudaStream()>>>(input->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), indices->specialBuffer(), packI.platformShapeInfo(), packI.platformOffsets(), values->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, packX.numberOfTads(), k); + topValuesMover<<<256, 256, 1024, *context->getCudaStream()>>>(input->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), indices->specialBuffer(), packI.platformShapeInfo(), packI.platformOffsets(), values->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, packX.numberOfTads(), k); } else { int scanWidth = 1; int numTreads = 256; int shMemSize = (numTreads * sizeof(X) * scanWidth) + (numTreads * sizeof(Y) * scanWidth) + 512; - indicesAlongDimension<<<256, numTreads, shMemSize, *context->getCudaStream()>>>(input->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), indices->specialBuffer(), packI.platformShapeInfo(), packI.platformOffsets(), values->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, packX.numberOfTads(), k, scanWidth, needSort); + indicesAlongDimension<<<256, numTreads, shMemSize, *context->getCudaStream()>>>(input->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), indices->specialBuffer(), packI.platformShapeInfo(), packI.platformOffsets(), values->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, packX.numberOfTads(), k, scanWidth, needSort); } return Status::OK(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu index b4dcfb2f6..f016491a6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu @@ -79,7 +79,7 @@ void invertPermutation(sd::LaunchContext* context, const NDArray& input, NDArray PointersManager manager(context, "invertPermutation"); NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), invertPermutationCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo()), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), invertPermutationCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), LIBND4J_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); @@ -163,7 +163,7 @@ void trace(sd::LaunchContext* context, const NDArray& input, NDArray& output) { const int sharedMem = threadsPerBlock * (sizeof(int) * input.rankOf() + input.sizeOfT()) + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), traceCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), diagLen), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), traceCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), diagLen), LIBND4J_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); @@ -226,7 +226,7 @@ void triuBP(sd::LaunchContext* context, const NDArray& input, const NDArray& gra PointersManager manager(context, "triuBP"); NDArray::prepareSpecialUse({&gradI}, {&gradO}); - BUILD_SINGLE_SELECTOR(gradI.dataType(), triuBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), diagonal), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(gradI.dataType(), triuBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), diagonal), LIBND4J_TYPES); NDArray::registerSpecialUse({&gradI}, {&gradO}); manager.synchronize(); @@ -294,7 +294,7 @@ void tileBP(sd::LaunchContext * context, const NDArray& gradO /*input*/, NDArray PointersManager manager(context, "tileBP"); NDArray::prepareSpecialUse({&gradI}, {&gradO, &memBuff}); - BUILD_SINGLE_SELECTOR(gradI.dataType(), tileBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), reinterpret_cast(memBuff.specialBuffer())), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradI.dataType(), tileBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), reinterpret_cast(memBuff.specialBuffer())), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&gradO, &memBuff}); manager.synchronize(); @@ -546,16 +546,16 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra if(dimensions.empty() || dimensions.size() == input.rankOf()) { // means whole array const int blocksPerGrid = (input.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - BUILD_DOUBLE_SELECTOR(xType, zType, clipByNormBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), nullptr, gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), nullptr, gradI.getSpecialBuffer(), gradI.getSpecialShapeInfo(), nullptr, context->getReductionPointer(), clipNormVal), FLOAT_TYPES, FLOAT_TYPES); + BUILD_DOUBLE_SELECTOR(xType, zType, clipByNormBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), nullptr, gradO.specialBuffer(), gradO.specialShapeInfo(), nullptr, gradI.specialBuffer(), gradI.specialShapeInfo(), nullptr, context->getReductionPointer(), clipNormVal), FLOAT_TYPES, FLOAT_TYPES); } else { // means tads using - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(gradO.getShapeInfo(), dimensions); - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.getShapeInfo(), dimensions); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto packY = ConstantTadHelper::getInstance()->tadForDimensions(gradO.shapeInfo(), dimensions); + auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), dimensions); const int blocksPerGrid = packX.numberOfTads(); - BUILD_DOUBLE_SELECTOR(xType, zType, clipByNormBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), gradO.getSpecialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), gradI.getSpecialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), nullptr, clipNormVal), FLOAT_TYPES, FLOAT_TYPES); + BUILD_DOUBLE_SELECTOR(xType, zType, clipByNormBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), gradO.specialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), gradI.specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), nullptr, clipNormVal), FLOAT_TYPES, FLOAT_TYPES); } NDArray::registerSpecialUse({&gradI}, {&input, &gradO}); @@ -564,7 +564,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra } template - static __global__ void swapShuffleKernel(T* input, Nd4jLong* shape, Nd4jLong firstDim, sd::graph::RandomGenerator* rng) { + static __global__ void swapShuffleKernel(T* input, Nd4jLong const* shape, Nd4jLong firstDim, sd::graph::RandomGenerator* rng) { auto tid = blockIdx.x * blockDim.x; auto step = blockDim.x * gridDim.x; @@ -582,7 +582,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra } } template - static __global__ void fillShuffleKernel(T* input, Nd4jLong* inputShape, T* output, Nd4jLong* outputShape, Nd4jLong firstDim, int* indices, sd::graph::RandomGenerator* rng) { + static __global__ void fillShuffleKernel(T* input, Nd4jLong const* inputShape, T* output, Nd4jLong const* outputShape, Nd4jLong firstDim, int* indices, sd::graph::RandomGenerator* rng) { // PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->tadThreshold()) auto tid = blockIdx.x * blockDim.x; @@ -613,7 +613,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra if(!isInplace) output.assign(input); } - else if (input.isVector() || shape::isLikeVector(input.getShapeInfo(), temp)) { + else if (input.isVector() || shape::isLikeVector(input.shapeInfo(), temp)) { // apply Fisher-Yates shuffle sd::graph::RandomGenerator* dRandom = nullptr; @@ -694,7 +694,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - static __global__ void clipByNormInplaceKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong* shape, Nd4jLong* inputOffsets, T* norm2Buf, Nd4jLong* norm2shape, T clipNorm) { + static __global__ void clipByNormInplaceKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong const* shape, Nd4jLong const* inputOffsets, T* norm2Buf, Nd4jLong const* norm2shape, T clipNorm) { for (int arr = blockIdx.x; arr < numOfSubArrs; arr += gridDim.x) { __shared__ T* z; __shared__ Nd4jLong len; @@ -713,7 +713,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - static __global__ void clipByNormKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong* shape, Nd4jLong* inputOffsets, T* outputBuffer, Nd4jLong* outputShape, Nd4jLong* outputOffsets, T* norm2Buf, Nd4jLong* norm2shape, T clipNorm) { + static __global__ void clipByNormKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong const* shape, Nd4jLong const* inputOffsets, T* outputBuffer, Nd4jLong const* outputShape, Nd4jLong const* outputOffsets, T* norm2Buf, Nd4jLong const* norm2shape, T clipNorm) { for (Nd4jLong arr = blockIdx.x; arr < numOfSubArrs; arr += gridDim.x) { __shared__ T* x, *z; @@ -761,9 +761,9 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra else { std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(rank, dimensions); - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.getShapeInfo(), dimsToExclude); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - //auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.shapeInfo(), dimsToExclude); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + //auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), dimsToExclude); T* inputBuffer = reinterpret_cast(input.specialBuffer()); T* norm2buf = reinterpret_cast(norm2.specialBuffer()); @@ -784,9 +784,9 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra else { std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(rank, dimensions); - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.getShapeInfo(), dimsToExclude); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), dimensions); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.shapeInfo(), dimsToExclude); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), dimensions); T* inputBuffer = reinterpret_cast(input.specialBuffer()); T* norm2buf = reinterpret_cast(norm2.specialBuffer()); T* outputBuffer = reinterpret_cast(output.specialBuffer()); @@ -891,7 +891,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra else return d1; */ template - static void __global__ clipByValueKernel(void* input, Nd4jLong* inputShape, void* output, Nd4jLong* outputShape, double leftBound, double rightBound) { + static void __global__ clipByValueKernel(void* input, Nd4jLong const* inputShape, void* output, Nd4jLong const* outputShape, double leftBound, double rightBound) { __shared__ T* outputBuf; __shared__ T* inputBuf; __shared__ Nd4jLong length; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu index e34fd11f8..c8f26de6f 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu @@ -43,7 +43,7 @@ namespace sd { template static __device__ void lowerTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, T const* rightInput, Nd4jLong const* rightInputShape, - bool const adjoint, T* output, Nd4jLong* outputShape, + bool const adjoint, T* output, Nd4jLong const* outputShape, Nd4jLong rows, Nd4jLong cols) { for (auto r = 0; r < rows; r++) { @@ -84,7 +84,7 @@ namespace sd { template static __device__ void upperTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, T const* rightInput, Nd4jLong const* rightInputShape, bool const adjoint, T* output, - Nd4jLong* outputShape, Nd4jLong rows, Nd4jLong cols) { + Nd4jLong const* outputShape, Nd4jLong rows, Nd4jLong cols) { for (auto r = rows; r > 0; r--) { for (auto j = 0; j < cols; j++) { @@ -109,8 +109,8 @@ namespace sd { template static __global__ void triangularSolveKernel(T const* leftInput, Nd4jLong const* leftPartShape, T const* rightInput, Nd4jLong const* rightPartShape, bool const lower, bool const adjoint, T* output, - Nd4jLong* outputShape, Nd4jLong* tadLeftShape, Nd4jLong* tadLeftOffset, Nd4jLong* tadRightShape, - Nd4jLong* tadRightOffset, Nd4jLong* tadOutputShape, Nd4jLong* tadOutputOffset, Nd4jLong batchNum) { + Nd4jLong const* outputShape, Nd4jLong const* tadLeftShape, Nd4jLong const* tadLeftOffset, Nd4jLong const* tadRightShape, + Nd4jLong const* tadRightOffset, Nd4jLong const* tadOutputShape, Nd4jLong const* tadOutputOffset, Nd4jLong batchNum) { __shared__ Nd4jLong rows; __shared__ Nd4jLong cols; @@ -141,16 +141,16 @@ namespace sd { static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output) { NDArray::prepareSpecialUse({output}, {leftInput, rightInput}); - auto leftTads = ConstantTadHelper::getInstance()->tadForDimensions(leftInput->getShapeInfo(), {-2, -1}); - auto rightTads = ConstantTadHelper::getInstance()->tadForDimensions(rightInput->getShapeInfo(), {-2, -1}); + auto leftTads = ConstantTadHelper::getInstance()->tadForDimensions(leftInput->shapeInfo(), {-2, -1}); + auto rightTads = ConstantTadHelper::getInstance()->tadForDimensions(rightInput->shapeInfo(), {-2, -1}); auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); - T const* leftBuf = reinterpret_cast(leftInput->getSpecialBuffer()); - T const* rightBuf = reinterpret_cast(rightInput->getSpecialBuffer()); + T const* leftBuf = reinterpret_cast(leftInput->specialBuffer()); + T const* rightBuf = reinterpret_cast(rightInput->specialBuffer()); T* outputBuf = reinterpret_cast(output->specialBuffer()); - triangularSolveKernel<<<128, 128, 256, *stream>>>(leftBuf, leftInput->getSpecialShapeInfo(), - rightBuf, rightInput->getSpecialShapeInfo(), lower, adjoint, outputBuf, output->specialShapeInfo(), + triangularSolveKernel<<<128, 128, 256, *stream>>>(leftBuf, leftInput->specialShapeInfo(), + rightBuf, rightInput->specialShapeInfo(), lower, adjoint, outputBuf, output->specialShapeInfo(), leftTads.specialShapeInfo(), leftTads.specialOffsets(), rightTads.specialShapeInfo(), rightTads.specialOffsets(), outputTads.specialShapeInfo(), outputTads.specialOffsets(), leftTads.numberOfTads()); @@ -168,7 +168,7 @@ namespace sd { template static __global__ void upperAdjointKernel(T const* input, T* output, Nd4jLong batchSize, Nd4jLong rows, Nd4jLong columns, - Nd4jLong* inputTads, Nd4jLong* inputOffsets, Nd4jLong* outputTads, Nd4jLong* outputOffsets) { + Nd4jLong const* inputTads, Nd4jLong const* inputOffsets, Nd4jLong const* outputTads, Nd4jLong const* outputOffsets) { for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { auto inputPart = input + inputOffsets[b]; @@ -189,7 +189,7 @@ namespace sd { template static __global__ void lowerAdjointKernel(T const* input, T* output, Nd4jLong batchSize, Nd4jLong rows, Nd4jLong columns, - Nd4jLong* inputTads, Nd4jLong* inputOffsets, Nd4jLong* outputTads, Nd4jLong* outputOffsets) { + Nd4jLong const* inputTads, Nd4jLong const* inputOffsets, Nd4jLong const* outputTads, Nd4jLong const* outputOffsets) { for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { auto inputPart = input + inputOffsets[b]; @@ -210,10 +210,10 @@ namespace sd { static void adjointTriangularMatrix_(sd::LaunchContext* context, NDArray const* input, bool const lower, NDArray* output) { - auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {-2, -1}); - auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {-2, -1}); + auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {-2, -1}); + auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); - auto inputBuf = reinterpret_cast(input->getSpecialBuffer()); + auto inputBuf = reinterpret_cast(input->specialBuffer()); auto outputBuf = reinterpret_cast(output->specialBuffer()); auto rows = input->sizeAt(-2); auto columns = input->sizeAt(-1); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaDelta.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaDelta.cu index 33272ff57..c096c4294 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaDelta.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaDelta.cu @@ -101,7 +101,7 @@ linkage void adaDeltaUpdaterCudaLauncher(const int blocksPerGrid, const int thre const T rho = static_cast(dRho); const T epsilon = static_cast(dEpsilon); - adaDeltaUpdaterCuda << > > (vx, xShapeInfo, vinMsg, inMsgShapeInfo, + adaDeltaUpdaterCuda<<>>(vx, xShapeInfo, vinMsg, inMsgShapeInfo, vinMsdx, inMsdxShapeInfo, vz, zShapeInfo, vstMsg, stMsgShapeInfo, vstMsdx, stMsdxShapeInfo, rho, epsilon); } @@ -115,10 +115,10 @@ void updaterAdaDelta(sd::LaunchContext* context, const NDArray& gradient, const const int blocksPerGrid = (gradient.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({ &update, &stateMsg, &stateMsdx }, { &gradient, &initStateMsg, &initStateMsdx }); - BUILD_SINGLE_SELECTOR(gradient.dataType(), adaDeltaUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initStateMsg.getSpecialBuffer(), initStateMsg.getSpecialShapeInfo(), initStateMsdx.getSpecialBuffer(), initStateMsdx.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(),stateMsg.getSpecialBuffer(), stateMsg.getSpecialShapeInfo(), - stateMsdx.getSpecialBuffer(), stateMsdx.getSpecialShapeInfo(), dRho, dEpsilon), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradient.dataType(), adaDeltaUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initStateMsg.specialBuffer(), initStateMsg.specialShapeInfo(), initStateMsdx.specialBuffer(), initStateMsdx.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(),stateMsg.specialBuffer(), stateMsg.specialShapeInfo(), + stateMsdx.specialBuffer(), stateMsdx.specialShapeInfo(), dRho, dEpsilon), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateMsg, &stateMsdx }, { &gradient, &initStateMsg, &initStateMsdx }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaGrad.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaGrad.cu index f0e77826d..50a43986c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaGrad.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaGrad.cu @@ -88,7 +88,7 @@ linkage void adaGradUpdaterCudaLauncher(const int blocksPerGrid, const int threa const T lr = static_cast(dLr); const T epsilon = static_cast(dEpsilon); - adaGradUpdaterCuda << > > (vx, xShapeInfo, vin, inShapeInfo, + adaGradUpdaterCuda<<>>(vx, xShapeInfo, vin, inShapeInfo, vz, zShapeInfo, vst, stShapeInfo, lr, epsilon); } @@ -103,10 +103,10 @@ void updaterAdaGrad(sd::LaunchContext* context, const NDArray& gradient, const N NDArray::prepareSpecialUse({ &update, &stateH }, { &gradient, &initState }); BUILD_SINGLE_SELECTOR(gradient.dataType(), adaGradUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), - gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initState.getSpecialBuffer(), initState.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), - stateH.getSpecialBuffer(), stateH.getSpecialShapeInfo(), dLr, dEpsilon), FLOAT_TYPES); + gradient.specialBuffer(), gradient.specialShapeInfo(), + initState.specialBuffer(), initState.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), + stateH.specialBuffer(), stateH.specialShapeInfo(), dLr, dEpsilon), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateH }, { &gradient, &initState }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaMax.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaMax.cu index 514440304..09301d05a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaMax.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaMax.cu @@ -111,7 +111,7 @@ linkage void adaMaxUpdaterCudaLauncher(const int blocksPerGrid, const int thread const T epsilon = static_cast(dEpsilon); const T iteration = static_cast(nIteration); - adaMaxUpdaterCuda << > > (vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vz, + adaMaxUpdaterCuda<<>>(vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vz, zShapeInfo, vstV, stvShapeInfo, vstM, stmShapeInfo, lr, beta1, beta2, epsilon, iteration); } @@ -127,10 +127,10 @@ void updaterAdaMax(sd::LaunchContext* context, const NDArray& gradient, const ND NDArray::prepareSpecialUse({ &update, &stateU, &stateM }, { &gradient, &initStateU, &initStateM }); BUILD_SINGLE_SELECTOR(gradient.dataType(), adaMaxUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), - gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), initStateU.getSpecialBuffer(), - initStateU.getSpecialShapeInfo(), initStateM.getSpecialBuffer(), initStateM.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), stateU.getSpecialBuffer(), - stateU.getSpecialShapeInfo(), stateM.getSpecialBuffer(), stateM.getSpecialShapeInfo(), + gradient.specialBuffer(), gradient.specialShapeInfo(), initStateU.specialBuffer(), + initStateU.specialShapeInfo(), initStateM.specialBuffer(), initStateM.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), stateU.specialBuffer(), + stateU.specialShapeInfo(), stateM.specialBuffer(), stateM.specialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration ), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateU, &stateM }, { &gradient, &initStateU, &initStateM }); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdam.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdam.cu index e23f4a5ca..91d79809c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdam.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdam.cu @@ -108,7 +108,7 @@ linkage void adamUpdaterCudaLauncher(const int blocksPerGrid, const int threadsP const T beta2 = static_cast(dBeta2); const T epsilon = static_cast(dEpsilon); const T iteration = static_cast(nIteration); - adamUpdaterCuda << > > (vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, + adamUpdaterCuda<<>>(vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vz, zShapeInfo, vstV, stvShapeInfo, vstM, stmShapeInfo, lr, beta1, beta2, epsilon, iteration); } @@ -124,10 +124,10 @@ void updaterAdam(sd::LaunchContext* context, const NDArray& gradient, const NDAr NDArray::prepareSpecialUse({ &update, &stateU, &stateM }, { &gradient, &initStateU, &initStateM }); - BUILD_SINGLE_SELECTOR(gradient.dataType(), adamUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initStateU.getSpecialBuffer(), initStateU.getSpecialShapeInfo(), initStateM.getSpecialBuffer(), initStateM.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), stateU.getSpecialBuffer(), stateU.getSpecialShapeInfo(), - stateM.getSpecialBuffer(), stateM.getSpecialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradient.dataType(), adamUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initStateU.specialBuffer(), initStateU.specialShapeInfo(), initStateM.specialBuffer(), initStateM.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), stateU.specialBuffer(), stateU.specialShapeInfo(), + stateM.specialBuffer(), stateM.specialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateU, &stateM }, { &gradient, &initStateU, &initStateM }); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAmsGrad.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAmsGrad.cu index d24c83f17..ff3bc1e4b 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAmsGrad.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAmsGrad.cu @@ -122,7 +122,7 @@ linkage void amsGradUpdaterCudaLauncher(const int blocksPerGrid, const int threa const T epsilon = static_cast(dEpsilon); const T iteration = static_cast(nIteration); - amsGradUpdaterCuda << > > (vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, + amsGradUpdaterCuda<<>>(vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vinh, inhShapeInfo, vz, zShapeInfo, vstV, stvShapeInfo, vstM, stmShapeInfo, vstH, sthShapeInfo, lr, beta1, beta2, epsilon, iteration); } @@ -136,11 +136,11 @@ void updaterAmsGrad(sd::LaunchContext* context, const NDArray& gradient, const N const int blocksPerGrid = (gradient.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({ &update, &stateV, &stateM, &stateH }, { &gradient, &initStateV, &initStateM, &initStateH }); - BUILD_SINGLE_SELECTOR(gradient.dataType(), amsGradUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initStateV.getSpecialBuffer(), initStateV.getSpecialShapeInfo(), initStateM.getSpecialBuffer(), initStateM.getSpecialShapeInfo(), - initStateH.getSpecialBuffer(), initStateH.getSpecialShapeInfo(), update.getSpecialBuffer(), update.getSpecialShapeInfo(), - stateV.getSpecialBuffer(), stateV.getSpecialShapeInfo(), stateM.getSpecialBuffer(), stateM.getSpecialShapeInfo(), - stateH.getSpecialBuffer(), stateH.getSpecialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradient.dataType(), amsGradUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initStateV.specialBuffer(), initStateV.specialShapeInfo(), initStateM.specialBuffer(), initStateM.specialShapeInfo(), + initStateH.specialBuffer(), initStateH.specialShapeInfo(), update.specialBuffer(), update.specialShapeInfo(), + stateV.specialBuffer(), stateV.specialShapeInfo(), stateM.specialBuffer(), stateM.specialShapeInfo(), + stateH.specialBuffer(), stateH.specialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateV, &stateM , &stateH }, { &gradient, &initStateV, &initStateM, &initStateH }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterNadam.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterNadam.cu index 2ac1ec99b..141ed27db 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterNadam.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterNadam.cu @@ -108,7 +108,7 @@ linkage void nadamUpdaterCudaLauncher(const int blocksPerGrid, const int threads const T epsilon = static_cast(dEpsilon); const T iteration = static_cast(nIteration); - nadamUpdaterCuda << > > (vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, + nadamUpdaterCuda<<>>(vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vz, zShapeInfo, vstV, stvShapeInfo, vstM, stmShapeInfo, lr, beta1, beta2, epsilon, iteration); } @@ -122,10 +122,10 @@ void updaterNadam(sd::LaunchContext* context, const NDArray& gradient, const NDA const int blocksPerGrid = (gradient.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({ &update, &stateV, &stateM }, { &gradient, &initStateV, &initStateM }); - BUILD_SINGLE_SELECTOR(gradient.dataType(), nadamUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initStateV.getSpecialBuffer(), initStateV.getSpecialShapeInfo(), initStateM.getSpecialBuffer(), initStateM.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), stateV.getSpecialBuffer(), stateV.getSpecialShapeInfo(), - stateM.getSpecialBuffer(), stateM.getSpecialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradient.dataType(), nadamUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initStateV.specialBuffer(), initStateV.specialShapeInfo(), initStateM.specialBuffer(), initStateM.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), stateV.specialBuffer(), stateV.specialShapeInfo(), + stateM.specialBuffer(), stateM.specialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateV, &stateM }, { &gradient, &initStateV, &initStateM }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterNesterovs.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterNesterovs.cu index 73616a5cd..75e1f5938 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterNesterovs.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterNesterovs.cu @@ -88,7 +88,7 @@ linkage void nesterovsUpdaterCudaLauncher(const int blocksPerGrid, const int thr const T lr = static_cast(dLr); const T momentum = static_cast(dMomentum); - nesterovsUpdaterCuda << > > (vx, xShapeInfo, vin, inShapeInfo, + nesterovsUpdaterCuda<<>>(vx, xShapeInfo, vin, inShapeInfo, vz, zShapeInfo, vst, stShapeInfo, lr, momentum); } @@ -103,10 +103,10 @@ void updaterNesterovs(sd::LaunchContext* context, const NDArray& gradient, const NDArray::prepareSpecialUse({ &update, &stateV }, { &gradient, &initState }); BUILD_SINGLE_SELECTOR(gradient.dataType(), nesterovsUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, - context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initState.getSpecialBuffer(), initState.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), - stateV.getSpecialBuffer(), stateV.getSpecialShapeInfo(), dLr, dMomentum), FLOAT_TYPES); + context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initState.specialBuffer(), initState.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), + stateV.specialBuffer(), stateV.specialShapeInfo(), dLr, dMomentum), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateV }, { &gradient, &initState }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterRmsProp.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterRmsProp.cu index de0a5dba1..26f7253d2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterRmsProp.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterRmsProp.cu @@ -105,10 +105,10 @@ void updaterRmsProp(sd::LaunchContext* context, const NDArray& gradient, const N NDArray::prepareSpecialUse({&update, &stateG}, {&gradient, &initState }); BUILD_SINGLE_SELECTOR(gradient.dataType(), rmsPropUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, - context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initState.getSpecialBuffer(), initState.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), - stateG.getSpecialBuffer(), stateG.getSpecialShapeInfo(), + context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initState.specialBuffer(), initState.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), + stateG.specialBuffer(), stateG.specialShapeInfo(), dLr, dRmsDecay, dEpsilon ), FLOAT_TYPES); NDArray::registerSpecialUse({&update, &stateG}, {&gradient, &initState}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/weights.cu b/libnd4j/include/ops/declarable/helpers/cuda/weights.cu index b543fa1c2..1620820a5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/weights.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/weights.cu @@ -27,8 +27,8 @@ namespace helpers { template - static __device__ void adjustWeightsKernelD(void* inputBuffer, Nd4jLong* inputShape, - void* weightsBuffer, Nd4jLong* weightsShape, + static __device__ void adjustWeightsKernelD(void* inputBuffer, Nd4jLong const* inputShape, + void* weightsBuffer, Nd4jLong const* weightsShape, void* outputBuffer, Nd4jLong inputLength, Nd4jLong outputLength, int val) { // typedef Nd4jLong T; @@ -66,9 +66,9 @@ namespace helpers { } template - static __global__ void adjustWeightsKernel(void* inputBuffer, Nd4jLong* inputShape, - void* weightsBuffer, Nd4jLong* weightsShape, - void* outputBuffer, Nd4jLong* outputShape, + static __global__ void adjustWeightsKernel(void* inputBuffer, Nd4jLong const* inputShape, + void* weightsBuffer, Nd4jLong const* weightsShape, + void* outputBuffer, Nd4jLong const* outputShape, int minLength, int maxLength) { //auto tid = blockIdx.x * blockDim.x + threadIdx.x; // * blockDim.x; // + threadIdx.x; @@ -105,7 +105,7 @@ namespace helpers { dim3 launchDims(256, 512, 8192); auto stream = context->getCudaStream(); adjustWeightsKernel<<>>(input->specialBuffer(), - input->getSpecialShapeInfo(), weights?weights->specialBuffer():nullptr, weights?weights->getSpecialShapeInfo():nullptr, + input->specialShapeInfo(), weights?weights->specialBuffer():nullptr, weights?weights->specialShapeInfo():nullptr, output->specialBuffer(), output->specialShapeInfo(), minLength, maxLength); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/zeta.cu b/libnd4j/include/ops/declarable/helpers/cuda/zeta.cu index 43f0ee8d1..660c49325 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/zeta.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/zeta.cu @@ -69,7 +69,7 @@ void zeta(sd::LaunchContext * context, const NDArray& x, const NDArray& q, NDArr int threadsPerBlock = MAX_NUM_THREADS / 2; int blocksPerGrid = (z.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - BUILD_SINGLE_SELECTOR(x.dataType(), zetaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), q.getSpecialBuffer(), q.getSpecialShapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(x.dataType(), zetaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), x.specialBuffer(), x.specialShapeInfo(), q.specialBuffer(), q.specialShapeInfo(), z.specialBuffer(), z.specialShapeInfo()), FLOAT_TYPES); x.tickReadHost(); q.tickReadHost(); diff --git a/libnd4j/include/ops/declarable/helpers/dilation2d.h b/libnd4j/include/ops/declarable/helpers/dilation2d.h index a26fe10f1..281a2f26a 100644 --- a/libnd4j/include/ops/declarable/helpers/dilation2d.h +++ b/libnd4j/include/ops/declarable/helpers/dilation2d.h @@ -54,7 +54,7 @@ FORCEINLINE Nd4jStatus outputSize(sd::LaunchContext * context, const int inSize, } ////////////////////////////////////////////////////////////////////// -FORCEINLINE Nd4jStatus dilation_hw(sd::LaunchContext * context, Nd4jLong *in, Nd4jLong *wh, std::vector &strides, std::vector &rates, bool isSameMode, int *sH, int *sW, int *pH, int *pW, int *dH, int *dW, int *oH, int *oW) { +FORCEINLINE Nd4jStatus dilation_hw(sd::LaunchContext * context, Nd4jLong const* in, Nd4jLong const* wh, std::vector &strides, std::vector &rates, bool isSameMode, int *sH, int *sW, int *pH, int *pW, int *dH, int *dW, int *oH, int *oW) { const int iH = shape::sizeAt(in, 1); const int iW = shape::sizeAt(in, 2); const int iC = shape::sizeAt(in, 3); diff --git a/libnd4j/include/ops/declarable/helpers/impl/knn_mindistance.cpp b/libnd4j/include/ops/declarable/helpers/impl/knn_mindistance.cpp index d3880c730..1a61587a3 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/knn_mindistance.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/knn_mindistance.cpp @@ -53,7 +53,7 @@ namespace sd { void knn_mindistance(const NDArray &input, const NDArray &lowest, const NDArray &highest, NDArray &output) { NDArray::preparePrimaryUse({&output}, {&input, &lowest, &highest}); - BUILD_SINGLE_SELECTOR(input.dataType(), mindistance_, (input.getBuffer(), lowest.getBuffer(), highest.getBuffer(), input.lengthOf(), output.buffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), mindistance_, (input.buffer(), lowest.buffer(), highest.buffer(), input.lengthOf(), output.buffer()), FLOAT_TYPES); NDArray::registerPrimaryUse({&output}, {&input, &lowest, &highest}); } diff --git a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp index 36044907e..bbcb1eca3 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp @@ -113,7 +113,7 @@ namespace sd { } // write out values - BUILD_DOUBLE_SELECTOR(values.dataType(), indices.dataType(), fill_, (values.getBuffer(), indices.getBuffer(), output.buffer(), output.getShapeInfo(), rank, values.lengthOf()), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(values.dataType(), indices.dataType(), fill_, (values.buffer(), indices.buffer(), output.buffer(), output.shapeInfo(), rank, values.lengthOf()), LIBND4J_TYPES, INDEXING_TYPES); } // copy back to device, if there's any output.syncToDevice(); diff --git a/libnd4j/include/ops/declarable/helpers/impl/where.cpp b/libnd4j/include/ops/declarable/helpers/impl/where.cpp index df8fd1074..b2d758673 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/where.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/where.cpp @@ -33,9 +33,9 @@ namespace sd { for (Nd4jLong e = 0; e < condition.lengthOf(); e++) { - shape::index2coordsCPU(0, e, condition.getShapeInfo(), idx); + shape::index2coordsCPU(0, e, condition.shapeInfo(), idx); - auto offset = shape::getOffset(condition.getShapeInfo(), idx); + auto offset = shape::getOffset(condition.shapeInfo(), idx); if (condition.e(offset)) { auto array = NDArrayFactory::create_('c', {1, condition.rankOf()}, output.dataType(), output.getContext()); diff --git a/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp b/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp index 66eade39f..8f0a6dcb8 100644 --- a/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp @@ -42,7 +42,7 @@ namespace sd { return shapeList; } - Nd4jLong *newshape = nullptr; + const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); } else if (shape::isScalar(x) && shape::isScalar(y)) { @@ -58,7 +58,7 @@ namespace sd { } else if (!shape::isScalar(x) && shape::isScalar(y)) { shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); } else if (ShapeUtils::areShapesBroadcastable(x, y)) { - Nd4jLong *newshape = nullptr; + const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); } else { diff --git a/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp b/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp index eb691b84d..7f7a14861 100644 --- a/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp +++ b/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp @@ -56,7 +56,7 @@ namespace sd { } - Nd4jLong *newshape = nullptr; + const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); } else if (shape::isScalar(x) && shape::isScalar(y)) { @@ -72,7 +72,7 @@ namespace sd { } else if (!shape::isScalar(x) && shape::isScalar(y)) { shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); } else if (ShapeUtils::areShapesBroadcastable(x, y)) { - Nd4jLong *newshape = nullptr; + const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); } else { diff --git a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp index 44fbaae42..c839c41c9 100644 --- a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp +++ b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp @@ -234,7 +234,7 @@ namespace sd { // we build list of input shapes if (fp) { for (const auto p:ctx.fastpath_in()) { - inSha.push_back(p == nullptr ? nullptr : p->getShapeInfo()); + inSha.push_back(p == nullptr ? nullptr : p->shapeInfo()); } } else { int arrCnt = 0; @@ -245,7 +245,7 @@ namespace sd { if (array == nullptr) throw unresolved_input_exception::build("Variable wasn't resolved prior shape calculation", p); - inSha.push_back(array->getShapeInfo()); + inSha.push_back(array->shapeInfo()); // we're also filling ctx with arrays if (canUseFastPath) @@ -1095,7 +1095,7 @@ namespace sd { NDArray *a0 = block.array(0); for (int e = 0; e < block.width(); e++) { auto aV = block.array(e); - if (!shape::equalsSoft(a0->getShapeInfo(), aV->getShapeInfo())) + if (!shape::equalsSoft(a0->shapeInfo(), aV->shapeInfo())) return ND4J_STATUS_BAD_DIMENSIONS; } diff --git a/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp index 03f34d269..f7cb3de92 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp @@ -41,7 +41,7 @@ namespace sd { int opNum = block.opNum() < 0 ? this->_opNum : block.opNum(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); PointersManager manager(block.launchContext(), "LegacyBroadcastBoolOp"); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); @@ -57,7 +57,7 @@ namespace sd { else { // this is rare, but possible use case - X and Z might have different shapes/strides/orders. In this case we prepare and pass separate TAD info - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->getShapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); auto zTadShape = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo)); auto zTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong)); diff --git a/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp index 0297df28a..82899bbdb 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp @@ -47,7 +47,7 @@ namespace sd { int opNum = block.opNum() < 0 ? this->_opNum : block.opNum(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto tadLen = shape::length(packX.primaryShapeInfo()); REQUIRE_TRUE(tadLen == y->lengthOf(), 0, "Length of broadcast TAD should be equal to length of Y operand, but got [%i] vs [%i]",tadLen, (int) y->lengthOf()); @@ -62,7 +62,7 @@ namespace sd { z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), dims.size(), pTadShape, pTadOffsets, pTadShape, pTadOffsets); else { // this is rare, but possible use case - X and Z might have different shapes/strides/orders. In this case we prepare and pass separate TAD info - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->getShapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); auto zTadShape = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo)); auto zTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong)); diff --git a/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp index c92577f3b..7fc6bf793 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp @@ -42,8 +42,8 @@ namespace sd { ShapeList *LegacyIndexReduceOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { auto inShape = inputShape->at(0); - Nd4jLong *newShape; if (block.getAxis()->size() == 0 && block.width() == 1) { + Nd4jLong *newShape; // in this case we just return scalar ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong); newShape[0] = 2; @@ -61,7 +61,7 @@ namespace sd { // in this case we're building proper shape for reduction auto array = INPUT_VARIABLE(0); //new NDArray(nullptr, inShape, block.getWorkspace()); - newShape = ShapeUtils::evalReduceShapeInfo('c', *block.getAxis(), *array, DataType::INT64, false, true, block.workspace()); + auto newShape = ShapeUtils::evalReduceShapeInfo('c', *block.getAxis(), *array, DataType::INT64, false, true, block.workspace()); return SHAPELIST(newShape); } else { @@ -78,6 +78,7 @@ namespace sd { axis[e] = f >= 0 ? f : f += rank; } if (allAxes){ + Nd4jLong *newShape; // in this case we just return scalar ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong); newShape[0] = 2; @@ -94,8 +95,7 @@ namespace sd { } else { // in this case we're building proper shape for reduction auto array = INPUT_VARIABLE(0); //new NDArray(nullptr, inShape, block.getWorkspace()); - newShape = ShapeUtils::evalReduceShapeInfo('c', axis, *array, DataType::INT64, false, true, block.workspace()); - return SHAPELIST(newShape); + return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', axis, *array, DataType::INT64, false, true, block.workspace())); } } } @@ -124,11 +124,11 @@ namespace sd { if (block.width() == 1) { if (block.getAxis()->size() == 0) { // scalar - NativeOpExecutioner::execIndexReduceScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), - x->getSpecialBuffer(), x->getSpecialShapeInfo(), + NativeOpExecutioner::execIndexReduceScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), + x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), - z->getSpecialBuffer(), z->getSpecialShapeInfo()); + z->buffer(), z->shapeInfo(), + z->specialBuffer(), z->specialShapeInfo()); } else { // TAD std::vector dims(block.getAxis()->size()); @@ -141,11 +141,11 @@ namespace sd { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); - NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), - x->getSpecialBuffer(), x->getSpecialShapeInfo(), + NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), + x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - reinterpret_cast(z->getBuffer()), z->getShapeInfo(), - z->getSpecialBuffer(), z->getSpecialShapeInfo(), + reinterpret_cast(z->buffer()), z->shapeInfo(), + z->specialBuffer(), z->specialShapeInfo(), nullptr, (int) dims.size(), Environment::getInstance()->isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), Environment::getInstance()->isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets()); } @@ -163,11 +163,11 @@ namespace sd { } if (allAxes) { - NativeOpExecutioner::execIndexReduceScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), - x->getSpecialBuffer(), x->getSpecialShapeInfo(), + NativeOpExecutioner::execIndexReduceScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), + x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->getSpecialBuffer(), - z->getSpecialShapeInfo()); + z->buffer(), z->shapeInfo(), z->specialBuffer(), + z->specialShapeInfo()); } else { if (indices->lengthOf() > 1) @@ -178,10 +178,10 @@ namespace sd { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), axis); NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum, - x->getBuffer(), x->getShapeInfo(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), + x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - reinterpret_cast(z->getBuffer()), - z->getShapeInfo(), z->getSpecialBuffer(), z->getSpecialShapeInfo(), + reinterpret_cast(z->buffer()), + z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), nullptr, (int) axis.size(), Environment::getInstance()->isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), Environment::getInstance()->isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets()); diff --git a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp index eb75141a9..11a05a76c 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp @@ -51,9 +51,9 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyPairwiseTransformBoolOp"); - NativeOpExecutioner::execPairwiseTransform(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), - y->getBuffer(), y->getShapeInfo(), y->getSpecialBuffer(), y->getSpecialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->getSpecialBuffer(), z->getSpecialShapeInfo(), + NativeOpExecutioner::execPairwiseTransform(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(x->dataType())); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformOp.cpp index 7f6eecb19..877d2d73d 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformOp.cpp @@ -51,9 +51,9 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyPairwiseTransformOp"); - NativeOpExecutioner::execPairwiseTransform(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - y->getBuffer(), y->getShapeInfo(), y->specialBuffer(), y->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + NativeOpExecutioner::execPairwiseTransform(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType())); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp index eeb80f403..085780c56 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp @@ -344,8 +344,7 @@ namespace sd { auto zShapeVector = zShapeArr->asVectorT(); auto dtype = block.dataType(); - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', zShapeVector); - return SHAPELIST(newShape); + return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', zShapeVector)); } else throw std::runtime_error("LegacyRandomOp: Unknown input data type!"); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp index 7143c3bbd..f110c0c55 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp @@ -52,8 +52,8 @@ namespace sd { if (dims[e] < 0) dims[e] += x->rankOf(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions requuired for reduction!"); diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp index 433e173fc..4aced5aec 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp @@ -63,7 +63,7 @@ namespace sd { if ((axis.empty()) || (axis.size() == 1 && axis[0] == sd::DataTypeUtils::max()) || allAxes) { // scalar - NativeOpExecutioner::execReduceBoolScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceBoolScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD @@ -75,15 +75,15 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), - dims.data(), (int) dims.size(), reinterpret_cast(pTadShape), reinterpret_cast(pTadOffsets)); + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + dims.data(), (int) dims.size(), reinterpret_cast(pTadShape), reinterpret_cast(pTadOffsets)); } STORE_RESULT(*z); @@ -103,7 +103,7 @@ namespace sd { if ((block.getIArguments()->size() == 1 && INT_ARG(0) == sd::DataTypeUtils::max()) || allAxes) { // scalar - NativeOpExecutioner::execReduceBoolScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); + NativeOpExecutioner::execReduceBoolScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD if (indices->lengthOf() > 1) @@ -111,13 +111,13 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); + NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } } diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp index 23f863ba2..55197844a 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp @@ -64,7 +64,7 @@ namespace sd { // (block.getIArguments()->size() == 1 && INT_ARG(0) == sd::DataTypeUtils::max()) if (block.getAxis()->empty() || allAxes) { // scalar - NativeOpExecutioner::execReduceFloatScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceFloatScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD @@ -76,14 +76,14 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - extras.argumentsAsT(z->dataType()), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), - dims.data(), (int) dims.size(), reinterpret_cast(pTadShape), reinterpret_cast(pTadOffsets)); + NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + dims.data(), (int) dims.size(), reinterpret_cast(pTadShape), reinterpret_cast(pTadOffsets)); } @@ -109,13 +109,13 @@ namespace sd { // TAD REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - extras.argumentsAsT(z->dataType()), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); @@ -133,8 +133,6 @@ namespace sd { ShapeList *LegacyReduceFloatOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { auto inShape = inputShape->at(0); - Nd4jLong *newShape; - bool allAxes = false; auto keepDims = block.numB() > 0 ? B_ARG(0) : false; @@ -146,7 +144,7 @@ namespace sd { allAxes = true; // in this case we're building proper shape for reduction - newShape = ShapeUtils::evalReduceShapeInfo(shape::order(inShape), axis, inShape, keepDims, !newFormat, block.workspace()); + auto newShape = ShapeUtils::evalReduceShapeInfo(shape::order(inShape), axis, inShape, keepDims, !newFormat, block.workspace()); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp index 17cba4227..628c4cb5f 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp @@ -63,7 +63,7 @@ namespace sd { if ((axis.empty()) || (axis.size() == 1 && axis[0] == sd::DataTypeUtils::max()) || allAxes) { // scalar - NativeOpExecutioner::execReduceLongScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceLongScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD @@ -78,14 +78,14 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } @@ -111,13 +111,13 @@ namespace sd { // TAD REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); + NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } } diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceOp.cpp index 46be149c6..e6c3dd63b 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceOp.cpp @@ -54,7 +54,7 @@ namespace sd { if ((block.getIArguments()->size() == 0) || (block.getIArguments()->size() == 1 && INT_ARG(0) == MAX_INT) || allAxes) { // scalar - NativeOpExcutioner::execReduceFloatScalar(opNum, x->getBuffer(), x->getShapeInfo(), block.getTArguments()->data(), z->buffer(), z->shapeInfo()); + NativeOpExcutioner::execReduceFloatScalar(opNum, x->buffer(), x->shapeInfo(), block.getTArguments()->data(), z->buffer(), z->shapeInfo()); } else { // TAD std::vector dims(*block.getIArguments()); @@ -67,11 +67,11 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - shape::TAD tad(x->getShapeInfo(), dims.data(), dims.size()); + shape::TAD tad(x->shapeInfo(), dims.data(), dims.size()); tad.createTadOnlyShapeInfo(); tad.createOffsets(); - NativeOpExcutioner::execReduceFloat(opNum, x->getBuffer(), x->getShapeInfo(), block.getTArguments()->data(), z->getBuffer(), z->getShapeInfo(), dims.data(), (int) dims.size(), tad.tadOnlyShapeInfo, tad.tadOffsets); + NativeOpExcutioner::execReduceFloat(opNum, x->buffer(), x->shapeInfo(), block.getTArguments()->data(), z->buffer(), z->shapeInfo(), dims.data(), (int) dims.size(), tad.tadOnlyShapeInfo, tad.tadOffsets); } STORE_RESULT(*z); @@ -92,7 +92,7 @@ namespace sd { if ((block.getIArguments()->size() == 1 && INT_ARG(0) == MAX_INT) || allAxes) { auto z = OUTPUT_VARIABLE(0); - auto b = x->getBuffer(); + auto b = x->buffer(); auto s = x->shapeInfo(); auto e = block.numT() > 0 ? block.getTArguments()->data() : nullptr; @@ -107,14 +107,14 @@ namespace sd { REQUIRE_TRUE(axis.size() > 0, 0, "Some dimensions required for reduction!"); - shape::TAD tad(x->getShapeInfo(), axis.data(), axis.size()); + shape::TAD tad(x->shapeInfo(), axis.data(), axis.size()); tad.createTadOnlyShapeInfo(); tad.createOffsets(); auto newShape = ShapeUtils::evalReduceShapeInfo(x->ordering(), axis, *x); auto z = new NDArray(newShape, x->getWorkspace()); - NativeOpExcutioner::execReduceFloat(opNum, x->getBuffer(), x->getShapeInfo(), block.getTArguments()->data(), z->getBuffer(), z->getShapeInfo(), axis.data(), (int) axis.size(), tad.tadOnlyShapeInfo, tad.tadOffsets); + NativeOpExcutioner::execReduceFloat(opNum, x->buffer(), x->shapeInfo(), block.getTArguments()->data(), z->buffer(), z->shapeInfo(), axis.data(), (int) axis.size(), tad.tadOnlyShapeInfo, tad.tadOffsets); // keepDims processing, for TF compatibility diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp index 3c96bca70..e406a3a2d 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp @@ -61,7 +61,7 @@ namespace sd { if (axis.empty() || allAxes) { // scalar - NativeOpExecutioner::execReduceSameScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceSameScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD @@ -73,14 +73,14 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), - z->getBuffer(), z->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + z->buffer(), z->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } @@ -106,13 +106,13 @@ namespace sd { // TAD REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - extras.argumentsAsT(z->dataType()), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } } @@ -129,8 +129,6 @@ namespace sd { ShapeList *LegacyReduceSameOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { auto inShape = inputShape->at(0); - Nd4jLong *newShape; - bool allAxes = false; auto keepDims = block.numB() > 0 ? B_ARG(0) : false; @@ -142,7 +140,7 @@ namespace sd { allAxes = true; // in this case we're building proper shape for reduction - newShape = ShapeUtils::evalReduceShapeInfo(shape::order(inShape), axis, inShape, keepDims, !newFormat, block.workspace()); + auto newShape = ShapeUtils::evalReduceShapeInfo(shape::order(inShape), axis, inShape, keepDims, !newFormat, block.workspace()); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyScalarBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyScalarBoolOp.cpp index 46728ede1..abfd84efb 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyScalarBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyScalarBoolOp.cpp @@ -64,19 +64,19 @@ namespace sd { NDArray::prepareSpecialUse({z}, {x, y}); - NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(x->dataType())); + NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(x->dataType())); } else if (block.getTArguments()->size() > 0) { auto y = NDArrayFactory::create(T_ARG(0), block.launchContext()); NDArray::prepareSpecialUse({z}, {x, &y}); - NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(),z->getBuffer(), z->getShapeInfo(),z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(x->dataType(), 1)); + NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),z->buffer(), z->shapeInfo(),z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(x->dataType(), 1)); manager.synchronize(); } else { NDArray::prepareSpecialUse({z}, {x, _scalar}); - NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(),z->getBuffer(), z->getShapeInfo(),z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(x->dataType())); + NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),z->buffer(), z->shapeInfo(),z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(x->dataType())); } manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp index de104a11d..3e73b10f5 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp @@ -64,7 +64,7 @@ namespace sd { NDArray::prepareSpecialUse({z}, {x, y}); - NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(z->dataType())); + NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(z->dataType())); NDArray::registerSpecialUse({z}, {x, y}); } else if (block.getTArguments()->size() > 0) { @@ -72,13 +72,13 @@ namespace sd { x->applyScalarArr(static_cast(opNum), y, *z); // NDArray::prepareSpecialUse({z}, {x, &y}); - // NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(z->dataType(), 1)); + // NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(z->dataType(), 1)); manager.synchronize(); } else { NDArray::prepareSpecialUse({z}, {x, _scalar}); - NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(z->dataType())); + NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(z->dataType())); NDArray::registerSpecialUse({z}, {x, _scalar}); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp index 74f82d162..b8694f9ff 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp @@ -46,8 +46,8 @@ namespace sd { if (block.getIArguments()->size() == 1 || (block.getIArguments()->size() == 2 && INT_ARG(1) == sd::DataTypeUtils::max())) { // scalar - NativeOpExecutioner::execSummaryStatsScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - extras.argumentsAsT(z->dataType()), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), biasCorrected); + NativeOpExecutioner::execSummaryStatsScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), biasCorrected); } else { // dimensions for TAD // we should skip first argument here, because it's addressing bias correction @@ -58,13 +58,13 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions requuired for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execSummaryStats(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets, biasCorrected); + NativeOpExecutioner::execSummaryStats(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets, biasCorrected); } manager.synchronize(); @@ -108,12 +108,13 @@ namespace sd { // in this case we're building proper shape for reduction auto array = new NDArray(nullptr, inShape, block.launchContext()); - newShape = ShapeUtils::evalReduceShapeInfo('c', *block.getIArguments(), *array, false, true); + auto newShape = ShapeUtils::evalReduceShapeInfo('c', *block.getIArguments(), *array, false, true); delete array; + return SHAPELIST(newShape); } - return SHAPELIST(newShape); + return SHAPELIST(CONSTANT(newShape)); } } } \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformAnyOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformAnyOp.cpp index def577eb3..dde8ce9e9 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformAnyOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformAnyOp.cpp @@ -48,8 +48,8 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(),"LegacyTransformAnyOp"); - NativeOpExecutioner::execTransformAny(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); + NativeOpExecutioner::execTransformAny(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp index 99b856b8a..a0651d1fc 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp @@ -48,8 +48,8 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(),"LegacyTransformBoolOp"); - NativeOpExecutioner::execTransformBool(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + NativeOpExecutioner::execTransformBool(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(input->dataType()), nullptr, nullptr); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformFloatOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformFloatOp.cpp index f0795b7bb..f25ba00fe 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformFloatOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformFloatOp.cpp @@ -48,8 +48,8 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyTransformFloatOp"); - NativeOpExecutioner::execTransformFloat(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); + NativeOpExecutioner::execTransformFloat(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformOp.cpp index b073d9df1..d0a8f7604 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformOp.cpp @@ -43,7 +43,7 @@ namespace sd { int opNum = block.opNum() < 0 ? this->_opNum : block.opNum(); - NativeOpExcutioner::execTransformSame(opNum, input->getBuffer(), input->getShapeInfo(), z->getBuffer(), z->getShapeInfo(), block.getTArguments()->data(), nullptr, nullptr); + NativeOpExcutioner::execTransformSame(opNum, input->buffer(), input->shapeInfo(), z->buffer(), z->shapeInfo(), block.getTArguments()->data(), nullptr, nullptr); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformSameOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformSameOp.cpp index 0d827787e..02a69da6b 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformSameOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformSameOp.cpp @@ -48,8 +48,8 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyTransformSameOp"); - NativeOpExecutioner::execTransformSame(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); + NativeOpExecutioner::execTransformSame(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformStrictOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformStrictOp.cpp index f36853579..2093e3aab 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformStrictOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformStrictOp.cpp @@ -48,7 +48,7 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyTransformStrictOp"); - NativeOpExecutioner::execTransformStrict(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); + NativeOpExecutioner::execTransformStrict(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/avgpool2d.cu b/libnd4j/include/ops/declarable/platform/cudnn/avgpool2d.cu index 5cf93f10f..eb213f4c2 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/avgpool2d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/avgpool2d.cu @@ -129,7 +129,7 @@ PLATFORM_CHECK(avgpool2d_bp, ENGINE_CUDA) { return goodType && (input->dataType() == gradO->dataType()) && (input->dataType() == gradI->dataType()) - && shape::haveSameShapeAndStrides(input->getShapeInfo(), gradI->getShapeInfo()); + && shape::haveSameShapeAndStrides(input->shapeInfo(), gradI->shapeInfo()); } diff --git a/libnd4j/include/ops/declarable/platform/cudnn/avgpool3d.cu b/libnd4j/include/ops/declarable/platform/cudnn/avgpool3d.cu index 0d01dfef3..da2fdbc09 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/avgpool3d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/avgpool3d.cu @@ -135,7 +135,7 @@ PLATFORM_CHECK(avgpool3dnew_bp, ENGINE_CUDA) { return goodType && (input->dataType() == gradO->dataType()) && (input->dataType() == gradI->dataType()) - && shape::haveSameShapeAndStrides(input->getShapeInfo(), gradI->getShapeInfo()); + && shape::haveSameShapeAndStrides(input->shapeInfo(), gradI->shapeInfo()); } diff --git a/libnd4j/include/ops/declarable/platform/cudnn/batchnorm.cu b/libnd4j/include/ops/declarable/platform/cudnn/batchnorm.cu index 8d0b1301a..7568ba47a 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/batchnorm.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/batchnorm.cu @@ -108,11 +108,11 @@ static void batchnormCUDNN(const LaunchContext* context, // calculations err = cudnnBatchNormalizationForwardInference(*handle, isSpatialMode ? CUDNN_BATCHNORM_SPATIAL : CUDNN_BATCHNORM_PER_ACTIVATION, ptrAlpha, ptrBeta, - x, input->getSpecialBuffer(), - z, output->getSpecialBuffer(), + x, input->specialBuffer(), + z, output->specialBuffer(), params, - gamma->getSpecialBuffer(), beta->getSpecialBuffer(), - mean->getSpecialBuffer(), variance->getSpecialBuffer(), epsilon); + gamma->specialBuffer(), beta->specialBuffer(), + mean->specialBuffer(), variance->specialBuffer(), epsilon); if (err != 0) throw sd::cuda_exception::build("batchnormCUDNN: cudnnBatchNormalizationForwardInference failed", err); @@ -215,13 +215,13 @@ static void batchnormBpCUDNN(const LaunchContext* context, // TODO: we can use cache here err = cudnnBatchNormalizationBackward(*handle, isSpatialMode ? CUDNN_BATCHNORM_SPATIAL : CUDNN_BATCHNORM_PER_ACTIVATION, ptrAlpha, ptrBeta, ptrAlpha, ptrBeta, - x, input->getSpecialBuffer(), - dz, gradO->getSpecialBuffer(), - dx, gradI->getSpecialBuffer(), + x, input->specialBuffer(), + dz, gradO->specialBuffer(), + dx, gradI->specialBuffer(), params, - gamma->getSpecialBuffer(), gradG->getSpecialBuffer(), gradB->getSpecialBuffer(), + gamma->specialBuffer(), gradG->specialBuffer(), gradB->specialBuffer(), epsilon, - nullptr/*mean->getSpecialBuffer()*/, nullptr/*variance->getSpecialBuffer()*/); + nullptr/*mean->specialBuffer()*/, nullptr/*variance->specialBuffer()*/); if (err != 0) throw sd::cuda_exception::build("batchnormBpCUDNN: cudnnBatchNormalizationBackward failed", err); @@ -362,11 +362,11 @@ PLATFORM_CHECK(batchnorm, ENGINE_CUDA) { return false; // *********************************** // - bool allParamsHaveSameShapeAndStrides = shape::haveSameShapeAndStrides(mean->getShapeInfo(), variance->getShapeInfo()); + bool allParamsHaveSameShapeAndStrides = shape::haveSameShapeAndStrides(mean->shapeInfo(), variance->shapeInfo()); if(gamma) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gamma->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gamma->shapeInfo()); if(beta) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), beta->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), beta->shapeInfo()); if(!allParamsHaveSameShapeAndStrides) return false; @@ -536,13 +536,13 @@ PLATFORM_CHECK(batchnorm_bp, ENGINE_CUDA) { return false; // *********************************** // - bool allParamsHaveSameShapeAndStrides = shape::haveSameShapeAndStrides(mean->getShapeInfo(), variance->getShapeInfo()); + bool allParamsHaveSameShapeAndStrides = shape::haveSameShapeAndStrides(mean->shapeInfo(), variance->shapeInfo()); if(gamma) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gamma->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gamma->shapeInfo()); if(gradG) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gradG->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gradG->shapeInfo()); if(gradB) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gradB->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gradB->shapeInfo()); if(!allParamsHaveSameShapeAndStrides) return false; diff --git a/libnd4j/include/ops/declarable/platform/cudnn/conv2d.cu b/libnd4j/include/ops/declarable/platform/cudnn/conv2d.cu index 43dc7ce07..a77faf6f7 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/conv2d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/conv2d.cu @@ -102,7 +102,7 @@ static void conv2dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input, weights, bias}); // run calculation - err = cudnnConvolutionForward(*handle, alpha, x, input->getSpecialBuffer(), w, weights->getSpecialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); + err = cudnnConvolutionForward(*handle, alpha, x, input->specialBuffer(), w, weights->specialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dCUDNN: cudnnConvolutionForward failed", err); // add bias if it is present @@ -112,7 +112,7 @@ static void conv2dCUDNN(const LaunchContext* context, // err = cudnnSetTensor4dDescriptor(b, format, cudnnDataType(bias->dataType()), 1, isNCHW ? bias->lengthOf() : 1, 1, isNCHW ? 1: bias->lengthOf()); err = cudnnSetTensor4dDescriptor(b, CUDNN_TENSOR_NCHW, cudnnDataType(bias->dataType()), 1, oC, 1, 1); if (err != 0) throw sd::cuda_exception::build("conv2dCUDNN: cudnnSetTensor4dDescriptor for bias failed", err); - err = cudnnAddTensor(*handle, alpha, b, bias->getSpecialBuffer(), alpha, z, output->specialBuffer()); + err = cudnnAddTensor(*handle, alpha, b, bias->specialBuffer(), alpha, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dCUDNN: cudnnAddTensor bias failed", err); } @@ -228,16 +228,16 @@ static void conv2dBpCUDNN(const LaunchContext* context, err = cudnnSetTensor4dDescriptor(db, CUDNN_TENSOR_NCHW, cudnnDataType(gradB->dataType()), 1, oC, 1, 1); if (err != 0) throw sd::cuda_exception::build("conv2dBpCUDNN: cudnnSetTensor4dDescriptor for gradB failed", err); - err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->getSpecialBuffer(), beta, db, gradB->getSpecialBuffer()); + err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->specialBuffer(), beta, db, gradB->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dBpCUDNN: cudnnConvolutionBackwardBias failed", err); } // run calculation for gradW - err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->getSpecialBuffer()); + err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dBpCUDNN: cudnnConvolutionBackwardFilter failed", err); // run calculation for gradI - err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->getSpecialBuffer()); + err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dBpCUDNN: cudnnConvolutionBackwardData failed", err); // cudaErr = cudaStreamSynchronize(*context->getCudaStream()); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/conv3d.cu b/libnd4j/include/ops/declarable/platform/cudnn/conv3d.cu index 9d226d6f7..693ebeefa 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/conv3d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/conv3d.cu @@ -114,7 +114,7 @@ static void conv3dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input, weights, bias}); // run calculation - err = cudnnConvolutionForward(*handle, alpha, x, input->getSpecialBuffer(), w, weights->getSpecialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); + err = cudnnConvolutionForward(*handle, alpha, x, input->specialBuffer(), w, weights->specialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dCUDNN: cudnnConvolutionForward failed", err); // add bias if it is present @@ -124,7 +124,7 @@ static void conv3dCUDNN(const LaunchContext* context, cudnnCreateTensorDescriptor(&b); err = cudnnSetTensorNdDescriptorEx(b, /*format*/CUDNN_TENSOR_NCHW, cudnnDataType(bias->dataType()), numDims, bShape.data()); if (err != 0) throw sd::cuda_exception::build("conv3dCUDNN: cudnnSetTensorNdDescriptor for bias failed", err); - err = cudnnAddTensor(*handle, alpha, b, bias->getSpecialBuffer(), alpha, z, output->specialBuffer()); + err = cudnnAddTensor(*handle, alpha, b, bias->specialBuffer(), alpha, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dCUDNN: cudnnAddTensor bias failed", err); } @@ -257,16 +257,16 @@ static void conv3dBpCUDNN(const LaunchContext* context, err = cudnnSetTensorNdDescriptorEx(db, format, cudnnDataType(gradB->dataType()), numDims, dbShape.data()); if (err != 0) throw sd::cuda_exception::build("conv3dBpCUDNN: cudnnSetTensorNdDescriptor for gradB failed", err); - err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->getSpecialBuffer(), beta, db, gradB->getSpecialBuffer()); + err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->specialBuffer(), beta, db, gradB->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dBpCUDNN: cudnnConvolutionBackwardBias failed", err); } // run calculation for gradW - err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->getSpecialBuffer()); + err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dBpCUDNN: cudnnConvolutionBackwardFilter failed", err); // run calculation for gradI - err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->getSpecialBuffer()); + err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dBpCUDNN: cudnnConvolutionBackwardData failed", err); // cudaErr = cudaStreamSynchronize(*context->getCudaStream()); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/cudnnUtils.cu b/libnd4j/include/ops/declarable/platform/cudnn/cudnnUtils.cu index 28e845b00..54f8a1f3b 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/cudnnUtils.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/cudnnUtils.cu @@ -165,7 +165,7 @@ void pooling2dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input}); // run calculation - err = cudnnPoolingForward(*handle, pooling, alpha, x, input->getSpecialBuffer(), beta, z, output->specialBuffer()); + err = cudnnPoolingForward(*handle, pooling, alpha, x, input->specialBuffer(), beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling2dCUDNN: cudnnPoolingForward failed", err); auto cudaErr = cudaStreamSynchronize(*context->getCudaStream()); @@ -228,7 +228,7 @@ void pooling2dBpCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({gradI}, {input, gradO}); // run calculation for gradI - err = cudnnPoolingBackward(*handle, pooling, alpha, dz, gradO->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), x, input->getSpecialBuffer(), beta, x, gradI->getSpecialBuffer()); + err = cudnnPoolingBackward(*handle, pooling, alpha, dz, gradO->specialBuffer(), dz, gradO->specialBuffer(), x, input->specialBuffer(), beta, x, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling2dBpCUDNN: cudnnPoolingBackward failed", err); auto cudaErr = cudaStreamSynchronize(*context->getCudaStream()); @@ -302,7 +302,7 @@ void pooling3dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input}); // run calculation - err = cudnnPoolingForward(*handle, pooling, alpha, x, input->getSpecialBuffer(), beta, z, output->specialBuffer()); + err = cudnnPoolingForward(*handle, pooling, alpha, x, input->specialBuffer(), beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling3dCUDNN: cudnnPoolingForward failed", err); auto cudaErr = cudaStreamSynchronize(*context->getCudaStream()); @@ -382,11 +382,11 @@ void pooling3dBpCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({gradI}, {input, gradO, &temp}); // run ff calculation - err = cudnnPoolingForward(*handle, pooling, alpha, x, input->getSpecialBuffer(), beta, dz, temp.specialBuffer()); + err = cudnnPoolingForward(*handle, pooling, alpha, x, input->specialBuffer(), beta, dz, temp.specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling3dCUDNN: cudnnPoolingForward failed", err); // run bp calculation for gradI - err = cudnnPoolingBackward(*handle, pooling, alpha, dz, temp.getSpecialBuffer(), dz, gradO->getSpecialBuffer(), x, input->getSpecialBuffer(), beta, x, gradI->getSpecialBuffer()); + err = cudnnPoolingBackward(*handle, pooling, alpha, dz, temp.specialBuffer(), dz, gradO->specialBuffer(), x, input->specialBuffer(), beta, x, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling2dBpCUDNN: cudnnPoolingBackward failed", err); NDArray::registerSpecialUse({gradI}, {input, gradO, &temp}); @@ -396,7 +396,7 @@ void pooling3dBpCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({gradI}, {input, gradO}); // run bp calculation for gradI - err = cudnnPoolingBackward(*handle, pooling, alpha, dz, gradO->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), x, input->getSpecialBuffer(), beta, x, gradI->getSpecialBuffer()); + err = cudnnPoolingBackward(*handle, pooling, alpha, dz, gradO->specialBuffer(), dz, gradO->specialBuffer(), x, input->specialBuffer(), beta, x, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling2dBpCUDNN: cudnnPoolingBackward failed", err); NDArray::registerSpecialUse({gradI}, {input, gradO}); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/depthwiseConv2d.cu b/libnd4j/include/ops/declarable/platform/cudnn/depthwiseConv2d.cu index 612206f35..c268961ce 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/depthwiseConv2d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/depthwiseConv2d.cu @@ -109,7 +109,7 @@ static void depthwiseConv2dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input, weights, bias}); // run calculation - err = cudnnConvolutionForward(*handle, alpha, x, input->getSpecialBuffer(), w, weights->getSpecialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); + err = cudnnConvolutionForward(*handle, alpha, x, input->specialBuffer(), w, weights->specialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dCUDNN: cudnnConvolutionForward failed", err); // add bias if it is present @@ -120,7 +120,7 @@ static void depthwiseConv2dCUDNN(const LaunchContext* context, // err = cudnnSetTensor4dDescriptor(b, format, cudnnDataType(bias->dataType()), 1, isNCHW ? bias->lengthOf() : 1, 1, isNCHW ? 1: bias->lengthOf()); err = cudnnSetTensor4dDescriptor(b, CUDNN_TENSOR_NCHW, cudnnDataType(bias->dataType()), 1, oC, 1, 1); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dCUDNN: cudnnSetTensor4dDescriptor for bias failed", err); - err = cudnnAddTensor(*handle, alpha, b, bias->getSpecialBuffer(), alpha, z, output->specialBuffer()); + err = cudnnAddTensor(*handle, alpha, b, bias->specialBuffer(), alpha, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dCUDNN: cudnnAddTensor bias failed", err); } @@ -246,16 +246,16 @@ static void depthwiseConv2dBpCUDNN(const LaunchContext* context, err = cudnnSetTensor4dDescriptor(db, CUDNN_TENSOR_NCHW, cudnnDataType(gradB->dataType()), 1, oC, 1, 1); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dBpCUDNN: cudnnSetTensor4dDescriptor for gradB failed", err); - err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->getSpecialBuffer(), beta, db, gradB->getSpecialBuffer()); + err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->specialBuffer(), beta, db, gradB->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dBpCUDNN: cudnnConvolutionBackwardBias failed", err); } // run calculation for gradW - err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->getSpecialBuffer()); + err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dBpCUDNN: cudnnConvolutionBackwardFilter failed", err); // run calculation for gradI - err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->getSpecialBuffer()); + err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dBpCUDNN: cudnnConvolutionBackwardData failed", err); // cudaErr = cudaStreamSynchronize(*context->getCudaStream()); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/maxpool2d.cu b/libnd4j/include/ops/declarable/platform/cudnn/maxpool2d.cu index 3919d9614..5bb646f57 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/maxpool2d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/maxpool2d.cu @@ -123,7 +123,7 @@ PLATFORM_CHECK(maxpool2d_bp, ENGINE_CUDA) { return goodType && (input->dataType() == gradO->dataType()) && (input->dataType() == gradI->dataType()) - && shape::haveSameShapeAndStrides(input->getShapeInfo(), gradI->getShapeInfo()); + && shape::haveSameShapeAndStrides(input->shapeInfo(), gradI->shapeInfo()); } diff --git a/libnd4j/include/ops/declarable/platform/cudnn/maxpool3d.cu b/libnd4j/include/ops/declarable/platform/cudnn/maxpool3d.cu index d28541b08..f7b9c8b50 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/maxpool3d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/maxpool3d.cu @@ -131,7 +131,7 @@ PLATFORM_CHECK(maxpool3dnew_bp, ENGINE_CUDA) { return goodType && (input->dataType() == gradO->dataType()) && (input->dataType() == gradI->dataType()) - && shape::haveSameShapeAndStrides(input->getShapeInfo(), gradI->getShapeInfo()); + && shape::haveSameShapeAndStrides(input->shapeInfo(), gradI->shapeInfo()); } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/batchnorm.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/batchnorm.cpp index 6ae27b42a..21bdbbe8d 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/batchnorm.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/batchnorm.cpp @@ -115,7 +115,7 @@ static void batchnormMKLDNN(const NDArray* x, const NDArray* mean, const NDArray mkldnnUtils::loadDataToMklStream(x, engine, stream, x_user_md, op_ff_prim_desc.src_desc(), args[DNNL_ARG_SRC]); // z - auto z_user_mem = dnnl::memory(z_user_md, engine, z->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, z->buffer()); const bool zReorder = op_ff_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_ff_prim_desc.dst_desc(), engine) : z_user_mem; if (zReorder) @@ -123,17 +123,17 @@ static void batchnormMKLDNN(const NDArray* x, const NDArray* mean, const NDArray args[DNNL_ARG_DST] = z_mkl_mem; // mean - auto mean_mkl_mem = dnnl::memory(op_ff_prim_desc.mean_desc(), engine, mean->getBuffer()); + auto mean_mkl_mem = dnnl::memory(op_ff_prim_desc.mean_desc(), engine, const_cast(mean->buffer())); args[DNNL_ARG_MEAN] = mean_mkl_mem; // variance - auto var_mkl_mem = dnnl::memory(op_ff_prim_desc.variance_desc(), engine, variance->getBuffer()); + auto var_mkl_mem = dnnl::memory(op_ff_prim_desc.variance_desc(), engine, const_cast(variance->buffer())); args[DNNL_ARG_VARIANCE] = var_mkl_mem; // gamma and beta (and their gradients) if they are present if(weights != nullptr) { - auto w_mkl_mem = dnnl::memory(op_ff_prim_desc.weights_desc(), engine, weights->getBuffer()); + auto w_mkl_mem = dnnl::memory(op_ff_prim_desc.weights_desc(), engine, const_cast(weights->buffer())); args[DNNL_ARG_WEIGHTS] = w_mkl_mem; } @@ -245,15 +245,15 @@ static void batchnormBackPropMKLDNN(const NDArray* x, const NDArray* mean, const mkldnnUtils::loadDataToMklStream(&dLdO, engine, stream, dLdO_user_md, op_bp_prim_desc.diff_dst_desc(), args[DNNL_ARG_DIFF_DST]); // mean - auto mean_mkl_mem = dnnl::memory(op_bp_prim_desc.mean_desc(), engine, mean->getBuffer()); + auto mean_mkl_mem = dnnl::memory(op_bp_prim_desc.mean_desc(), engine, const_cast(mean->buffer())); args[DNNL_ARG_MEAN] = mean_mkl_mem; // variance - auto var_mkl_mem = dnnl::memory(op_bp_prim_desc.variance_desc(), engine, variance->getBuffer()); + auto var_mkl_mem = dnnl::memory(op_bp_prim_desc.variance_desc(), engine, const_cast(variance->buffer())); args[DNNL_ARG_VARIANCE] = var_mkl_mem; // dLdI - auto dLdI_user_mem = dnnl::memory(dLdI_user_md, engine, dLdI->getBuffer()); + auto dLdI_user_mem = dnnl::memory(dLdI_user_md, engine, dLdI->buffer()); const bool dLdIReorder = op_bp_prim_desc.diff_src_desc() != dLdI_user_mem.get_desc(); auto dLdI_mkl_mem = dLdIReorder ? dnnl::memory(op_bp_prim_desc.diff_src_desc(), engine) : dLdI_user_mem; args[DNNL_ARG_DIFF_SRC] = dLdI_mkl_mem; @@ -261,10 +261,10 @@ static void batchnormBackPropMKLDNN(const NDArray* x, const NDArray* mean, const // gamma and beta (and their gradients) if they are present if(weights != nullptr) { - auto w_mkl_mem = dnnl::memory(op_bp_prim_desc.weights_desc(), engine, weights->getBuffer()); + auto w_mkl_mem = dnnl::memory(op_bp_prim_desc.weights_desc(), engine, const_cast(weights->buffer())); args[DNNL_ARG_WEIGHTS] = w_mkl_mem; - auto dLdW_mkl_mem = dnnl::memory(op_bp_prim_desc.weights_desc(), engine, dLdW->getBuffer()); + auto dLdW_mkl_mem = dnnl::memory(op_bp_prim_desc.weights_desc(), engine, dLdW->buffer()); args[DNNL_ARG_DIFF_WEIGHTS] = dLdW_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/conv2d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/conv2d.cpp index 0aa05f7f2..b1def8ed7 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/conv2d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/conv2d.cpp @@ -121,12 +121,12 @@ static void conv2dMKLDNN(const NDArray *input, const NDArray *weights, // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -262,7 +262,7 @@ static void conv2dBpMKLDNN(const NDArray *input, const NDArray *weights, const N mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -274,20 +274,20 @@ static void conv2dBpMKLDNN(const NDArray *input, const NDArray *weights, const N args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/conv3d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/conv3d.cpp index 68f0eea89..b9fa696c5 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/conv3d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/conv3d.cpp @@ -125,12 +125,12 @@ static void conv3dMKLDNN(const NDArray *input, const NDArray *weights, // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -273,7 +273,7 @@ static void conv3dBpMKLDNN(const NDArray *input, const NDArray *weights, const N mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -285,20 +285,20 @@ static void conv3dBpMKLDNN(const NDArray *input, const NDArray *weights, const N args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } @@ -379,7 +379,7 @@ static void conv3dMKLDNN(sd::graph::Context &block, } if (bias != nullptr) { - auto conv_bias_memory = dnnl::memory(conv_prim_desc.bias_desc(), engine, bias->getBuffer()); + auto conv_bias_memory = dnnl::memory(conv_prim_desc.bias_desc(), engine, bias->buffer()); convolution_forward(conv_prim_desc).execute(stream, {{DNNL_ARG_SRC, conv_src_memory}, {DNNL_ARG_WEIGHTS, conv_weights_memory}, {DNNL_ARG_BIAS, conv_bias_memory}, diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp index a1ca2a717..584fd50a5 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp @@ -142,12 +142,12 @@ static void deconv2dMKLDNN(const NDArray* input, const NDArray* weights, const N // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -279,7 +279,7 @@ static void deconv2dBpMKLDNN(const NDArray* input, const NDArray* weights, const mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -291,20 +291,20 @@ static void deconv2dBpMKLDNN(const NDArray* input, const NDArray* weights, const args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d_tf.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d_tf.cpp index 3236990b1..5e5da4748 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d_tf.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d_tf.cpp @@ -107,7 +107,7 @@ static void deconv2TFdBackPropMKLDNN(const NDArray* weights, const NDArray* grad mkldnnUtils::loadDataToMklStream(gradO, engine, stream, gradO_user_md, op_data_bp_prim_desc.diff_dst_desc(), args[DNNL_ARG_DIFF_DST]); // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp index bcc3d700a..eb6966c77 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp @@ -144,12 +144,12 @@ static void deconv3dMKLDNN(const NDArray* input, const NDArray* weights, const N // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -287,7 +287,7 @@ static void deconv3dBackPropMKLDNN(const NDArray* input, const NDArray* weights, mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -299,20 +299,20 @@ static void deconv3dBackPropMKLDNN(const NDArray* input, const NDArray* weights, args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/depthwiseConv2d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/depthwiseConv2d.cpp index 2ca16bb8e..92f40537b 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/depthwiseConv2d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/depthwiseConv2d.cpp @@ -153,12 +153,12 @@ static void depthwiseConv2dMKLDNN(const NDArray* input, const NDArray* weights, // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -300,7 +300,7 @@ static void depthwiseConv2dNackPropMKLDNN(const NDArray* input, const NDArray* w mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -312,20 +312,20 @@ static void depthwiseConv2dNackPropMKLDNN(const NDArray* input, const NDArray* w args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/lstmLayer.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/lstmLayer.cpp index 6763d1403..60c61ea5f 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/lstmLayer.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/lstmLayer.cpp @@ -281,7 +281,7 @@ static void lstmLayerMKLDNN(const NDArray* x, const NDArray* Wx, const NDArray* mkldnnUtils::loadDataToMklStream(Wr, engine, stream, wr_user_md, lstm_prim_desc.weights_iter_desc(), args[DNNL_ARG_WEIGHTS_ITER]); // h - auto h_user_mem = dnnl::memory(h_user_md, engine, h->getBuffer()); + auto h_user_mem = dnnl::memory(h_user_md, engine, h->buffer()); const bool hReorder = lstm_prim_desc.dst_layer_desc() != h_user_mem.get_desc(); auto h_lstm_mem = hReorder ? dnnl::memory(lstm_prim_desc.dst_layer_desc(), engine) : h_user_mem; args[DNNL_ARG_DST_LAYER] = h_lstm_mem; @@ -306,7 +306,7 @@ static void lstmLayerMKLDNN(const NDArray* x, const NDArray* Wx, const NDArray* // hL if(hL) { - hL_user_mem = dnnl::memory(hL_user_md, engine, hL->getBuffer()); + hL_user_mem = dnnl::memory(hL_user_md, engine, hL->buffer()); hLReorder = lstm_prim_desc.dst_iter_desc() != hL_user_mem.get_desc(); hL_lstm_mem = hLReorder ? dnnl::memory(lstm_prim_desc.dst_iter_desc(), engine) : hL_user_mem; args[DNNL_ARG_DST_ITER] = hL_lstm_mem; @@ -314,7 +314,7 @@ static void lstmLayerMKLDNN(const NDArray* x, const NDArray* Wx, const NDArray* // cL if(cL) { - cL_user_mem = dnnl::memory(cL_user_md, engine, cL->getBuffer()); + cL_user_mem = dnnl::memory(cL_user_md, engine, cL->buffer()); cLReorder = lstm_prim_desc.dst_iter_c_desc() != cL_user_mem.get_desc(); cL_lstm_mem = cLReorder ? dnnl::memory(lstm_prim_desc.dst_iter_c_desc(), engine) : cL_user_mem; args[DNNL_ARG_DST_ITER_C] = cL_lstm_mem; diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/matmul.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/matmul.cpp index 0dd3b21f7..265fb74bc 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/matmul.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/matmul.cpp @@ -183,7 +183,7 @@ static void matmulMKLDNN(const NDArray* x, const NDArray* y, NDArray* z, const b // input mkldnnUtils::loadDataToMklStream(xTR, engine, stream, x_user_md, op_prim_desc.src_desc(), args[DNNL_ARG_SRC]); /* - auto x_user_mem = dnnl::memory(x_user_md, engine, xTR->getBuffer()); + auto x_user_mem = dnnl::memory(x_user_md, engine, xTR->buffer()); const bool xReorder = op_prim_desc.src_desc() != x_user_mem.get_desc(); auto x_mkl_mem = xReorder ? dnnl::memory(op_prim_desc.src_desc(), engine) : x_user_mem; if (xReorder) @@ -193,7 +193,7 @@ static void matmulMKLDNN(const NDArray* x, const NDArray* y, NDArray* z, const b // y mkldnnUtils::loadDataToMklStream(yTR, engine, stream, y_user_md, op_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); /* - auto y_user_mem = dnnl::memory(y_user_md, engine, yTR->getBuffer()); + auto y_user_mem = dnnl::memory(y_user_md, engine, yTR->buffer()); const bool yReorder = op_prim_desc.weights_desc() != y_user_mem.get_desc(); auto y_mkl_mem = yReorder ? dnnl::memory(op_prim_desc.weights_desc(), engine) : y_user_mem; if (yReorder) @@ -201,7 +201,7 @@ static void matmulMKLDNN(const NDArray* x, const NDArray* y, NDArray* z, const b args[DNNL_ARG_WEIGHTS] = y_mkl_mem; */ // z - auto z_user_mem = dnnl::memory(z_user_md, engine, zR->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, zR->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -215,7 +215,7 @@ static void matmulMKLDNN(const NDArray* x, const NDArray* y, NDArray* z, const b stream.wait(); - if(zR->getBuffer() != z->getBuffer()) + if(zR->buffer() != z->buffer()) z->assign(zR); if(zR != z) diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp index b8e489c4c..bc79e6169 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp @@ -71,7 +71,7 @@ void setBlockStrides(const NDArray* array, dnnl::memory::desc& mklMd){ void loadDataToMklStream(const NDArray* array, const dnnl::engine& engine, const dnnl::stream& stream, const dnnl::memory::desc& user_md, const dnnl::memory::desc& primitive_md, dnnl::memory& arg) { - auto user_mem = dnnl::memory(user_md, engine, array->getBuffer()); + auto user_mem = dnnl::memory(user_md, engine,const_cast(array->buffer())); const bool bReorder = primitive_md != user_mem.get_desc(); auto mkl_mem = bReorder ? dnnl::memory(primitive_md, engine) : user_mem; if (bReorder) @@ -167,7 +167,7 @@ void poolingMKLDNN(const NDArray *input, NDArray *output, mkldnnUtils::loadDataToMklStream(input, engine, stream, x_user_md, op_prim_desc.src_desc(), args[DNNL_ARG_SRC]); // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -285,7 +285,7 @@ void poolingBpMKLDNN(const NDArray *input, const NDArray *gradO, NDArray *gradI, mkldnnUtils::loadDataToMklStream(gradO, engine, stream, gradO_user_md, op_bp_prim_desc.diff_dst_desc(), args[DNNL_ARG_DIFF_DST]); // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; @@ -322,7 +322,7 @@ void poolingBpMKLDNN(const NDArray *input, const NDArray *gradO, NDArray *gradI, void getMKLDNNMemoryDescLrn(const NDArray* src, const NDArray* diff_src, const NDArray* dst, dnnl::memory::desc* lrn_src_md, dnnl::memory::desc* lrn_diff_src_md, dnnl::memory::desc* lrn_dst_md, dnnl::memory::desc* user_src_md, dnnl::memory::desc* user_diff_src_md, dnnl::memory::desc* user_dst_md, int axis) { - const Nd4jLong* shape = src->getShapeInfo(); + const Nd4jLong* shape = src->shapeInfo(); long rank = shape[0]; long dim1 = axis; // MKL-DNN supports only 1 axis, which has to be the "channel" one long dim2 = axis >= 2 ? 1 : 2; @@ -333,7 +333,7 @@ void getMKLDNNMemoryDescLrn(const NDArray* src, const NDArray* diff_src, const N auto format = axis == 1 ? dnnl::memory::format_tag::nchw : dnnl::memory::format_tag::nhwc; auto supposed_to_be_any_format = format; // doesn't work with "any" - if (src != nullptr && src->getBuffer() != nullptr && lrn_src_md != nullptr) { + if (src != nullptr && src->buffer() != nullptr && lrn_src_md != nullptr) { *lrn_src_md = dnnl::memory::desc({ lrn_src_tz }, type, supposed_to_be_any_format); *user_src_md = dnnl::memory::desc({ lrn_src_tz }, type, format); user_src_md->data.format_kind = dnnl_blocked; @@ -343,7 +343,7 @@ void getMKLDNNMemoryDescLrn(const NDArray* src, const NDArray* diff_src, const N user_src_md->data.format_desc.blocking.strides[3] = rank > 3 ? src->stridesOf()[dim3] : 1; } - if (diff_src != nullptr && diff_src->getBuffer() != nullptr && lrn_diff_src_md != nullptr) { + if (diff_src != nullptr && diff_src->buffer() != nullptr && lrn_diff_src_md != nullptr) { *lrn_diff_src_md = dnnl::memory::desc({ lrn_src_tz }, type, supposed_to_be_any_format); *user_diff_src_md = dnnl::memory::desc({ lrn_src_tz }, type, format); user_diff_src_md->data.format_kind = dnnl_blocked; @@ -353,7 +353,7 @@ void getMKLDNNMemoryDescLrn(const NDArray* src, const NDArray* diff_src, const N user_diff_src_md->data.format_desc.blocking.strides[3] = rank > 3 ? diff_src->stridesOf()[dim3] : 1; } - if (dst != nullptr && dst->getBuffer() != nullptr && lrn_dst_md != nullptr) { + if (dst != nullptr && dst->buffer() != nullptr && lrn_dst_md != nullptr) { *lrn_dst_md = dnnl::memory::desc({ lrn_src_tz }, type, supposed_to_be_any_format); *user_dst_md = dnnl::memory::desc({ lrn_src_tz }, type, format); user_dst_md->data.format_kind = dnnl_blocked; @@ -396,7 +396,7 @@ void getMKLDNNMemoryDescPool2d( auto format = isNCHW ? dnnl::memory::format_tag::nchw : dnnl::memory::format_tag::nhwc; auto supposed_to_be_any_format = dnnl::memory::format_tag::nChw8c; // doesn't work with "any" - if (src != nullptr && src->getBuffer() != nullptr && pool_src_md != nullptr) { + if (src != nullptr && src->buffer() != nullptr && pool_src_md != nullptr) { *pool_src_md = dnnl::memory::desc({ pool_src_tz }, type, supposed_to_be_any_format); *user_src_md = dnnl::memory::desc({ pool_src_tz }, type, format); user_src_md->data.format_kind = dnnl_blocked; // overrides "format = isNCHW ? nchw : nhwc" @@ -406,7 +406,7 @@ void getMKLDNNMemoryDescPool2d( user_src_md->data.format_desc.blocking.strides[3] = src->stridesOf()[isNCHW ? 3 : 2]; } - if (diff_src != nullptr && diff_src->getBuffer() != nullptr && pool_diff_src_md != nullptr) { + if (diff_src != nullptr && diff_src->buffer() != nullptr && pool_diff_src_md != nullptr) { *pool_diff_src_md = dnnl::memory::desc({ pool_src_tz }, type, supposed_to_be_any_format); *user_diff_src_md = dnnl::memory::desc({ pool_src_tz }, type, format); user_diff_src_md->data.format_kind = dnnl_blocked; // overrides "format = isNCHW ? nchw : nhwc" @@ -416,7 +416,7 @@ void getMKLDNNMemoryDescPool2d( user_diff_src_md->data.format_desc.blocking.strides[3] = diff_src->stridesOf()[isNCHW ? 3 : 2]; } - if (dst != nullptr && dst->getBuffer() != nullptr && pool_dst_md != nullptr) { + if (dst != nullptr && dst->buffer() != nullptr && pool_dst_md != nullptr) { *pool_dst_md = dnnl::memory::desc({ pool_dst_tz }, type, supposed_to_be_any_format); *user_dst_md = dnnl::memory::desc({ pool_dst_tz }, type, format); user_dst_md->data.format_kind = dnnl_blocked; // overrides "format = isNCHW ? nchw : nhwc" @@ -452,7 +452,7 @@ void getMKLDNNMemoryDescPool3d( auto format = isNCDHW ? dnnl::memory::format_tag::ncdhw : dnnl::memory::format_tag::ndhwc; auto supposed_to_be_any_format = dnnl::memory::format_tag::nCdhw8c; // doesn't work with "any" - if (src != nullptr && src->getBuffer() != nullptr && pool_src_md != nullptr) { + if (src != nullptr && src->buffer() != nullptr && pool_src_md != nullptr) { *pool_src_md = dnnl::memory::desc({ pool_src_tz }, type, supposed_to_be_any_format); *user_src_md = dnnl::memory::desc({ pool_src_tz }, type, format); user_src_md->data.format_kind = dnnl_blocked; // overrides "format = isNCDHW ? ncdhw : ndhwc" @@ -463,7 +463,7 @@ void getMKLDNNMemoryDescPool3d( user_src_md->data.format_desc.blocking.strides[4] = src->stridesOf()[isNCDHW ? 4 : 3]; } - if (diff_src != nullptr && diff_src->getBuffer() != nullptr && pool_diff_src_md != nullptr) { + if (diff_src != nullptr && diff_src->buffer() != nullptr && pool_diff_src_md != nullptr) { *pool_diff_src_md = dnnl::memory::desc({ pool_src_tz }, type, supposed_to_be_any_format); *user_diff_src_md = dnnl::memory::desc({ pool_src_tz }, type, format); user_diff_src_md->data.format_kind = dnnl_blocked; // overrides "format = isNCDHW ? ncdhw : ndhwc" @@ -474,7 +474,7 @@ void getMKLDNNMemoryDescPool3d( user_diff_src_md->data.format_desc.blocking.strides[4] = diff_src->stridesOf()[isNCDHW ? 4 : 3]; } - if (dst != nullptr && dst->getBuffer() != nullptr && pool_dst_md != nullptr) { + if (dst != nullptr && dst->buffer() != nullptr && pool_dst_md != nullptr) { *pool_dst_md = dnnl::memory::desc({ pool_dst_tz }, type, supposed_to_be_any_format); *user_dst_md = dnnl::memory::desc({ pool_dst_tz }, type, format); user_dst_md->data.format_kind = dnnl_blocked; // overrides "format = isNCDHW ? ncdhw : ndhwc" @@ -656,7 +656,7 @@ void getMKLDNNMemoryDescConv3d( void getMKLDNNMemoryDescBatchNorm(const NDArray* src, const NDArray* diff_src, const NDArray* dst, dnnl::memory::desc* batchnorm_src_md, dnnl::memory::desc* batchnorm_diff_src_md, dnnl::memory::desc* batchnorm_dst_md, dnnl::memory::desc* user_src_md, dnnl::memory::desc* user_diff_src_md, dnnl::memory::desc* user_dst_md, int axis) { - const Nd4jLong* shape = src->getShapeInfo(); + const Nd4jLong* shape = src->shapeInfo(); Nd4jLong rank = shape[0]; Nd4jLong dim1 = axis; // MKL-DNN supports only 1 axis, which has to be the "channel" one Nd4jLong dim2 = axis >= 2 ? 1 : 2; @@ -667,7 +667,7 @@ void getMKLDNNMemoryDescBatchNorm(const NDArray* src, const NDArray* diff_src, c auto format = dnnl::memory::format_tag::nchw; auto supposed_to_be_any_format = dnnl::memory::format_tag::nChw8c; // doesn't work with "any" - if (src != nullptr && src->getBuffer() != nullptr && batchnorm_src_md != nullptr) { + if (src != nullptr && src->buffer() != nullptr && batchnorm_src_md != nullptr) { *batchnorm_src_md = dnnl::memory::desc({ batchnorm_src_tz }, type, supposed_to_be_any_format); *user_src_md = dnnl::memory::desc({ batchnorm_src_tz }, type, format); user_src_md->data.format_kind = dnnl_blocked; // overrides format @@ -677,7 +677,7 @@ void getMKLDNNMemoryDescBatchNorm(const NDArray* src, const NDArray* diff_src, c user_src_md->data.format_desc.blocking.strides[3] = rank > 3 ? src->stridesOf()[dim3] : 1; } - if (diff_src != nullptr && diff_src->getBuffer() != nullptr && batchnorm_diff_src_md != nullptr) { + if (diff_src != nullptr && diff_src->buffer() != nullptr && batchnorm_diff_src_md != nullptr) { *batchnorm_diff_src_md = dnnl::memory::desc({ batchnorm_src_tz }, type, supposed_to_be_any_format); *user_diff_src_md = dnnl::memory::desc({ batchnorm_src_tz }, type, format); user_diff_src_md->data.format_kind = dnnl_blocked; // overrides format @@ -687,7 +687,7 @@ void getMKLDNNMemoryDescBatchNorm(const NDArray* src, const NDArray* diff_src, c user_diff_src_md->data.format_desc.blocking.strides[3] = rank > 3 ? diff_src->stridesOf()[dim3] : 1; } - if (dst != nullptr && dst->getBuffer() != nullptr && batchnorm_dst_md != nullptr) { + if (dst != nullptr && dst->buffer() != nullptr && batchnorm_dst_md != nullptr) { *batchnorm_dst_md = dnnl::memory::desc({ batchnorm_src_tz }, type, supposed_to_be_any_format); *user_dst_md = dnnl::memory::desc({ batchnorm_src_tz }, type, format); user_dst_md->data.format_kind = dnnl_blocked; // overrides format diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/softmax.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/softmax.cpp index a178e84c2..932affbd3 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/softmax.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/softmax.cpp @@ -83,7 +83,7 @@ namespace sd { mkldnnUtils::loadDataToMklStream(x, engine, stream, x_user_md, op_prim_desc.src_desc(), args[DNNL_ARG_SRC]); // z - auto z_user_mem = dnnl::memory(z_user_md, engine, z->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, z->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -191,7 +191,7 @@ namespace sd { mkldnnUtils::loadDataToMklStream(x, engine, stream, x_user_md, op_ff_prim_desc.src_desc(), argsff[DNNL_ARG_SRC]); // dLdx - auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->getBuffer()); + auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->buffer()); const bool dLdxReorder = op_ff_prim_desc.dst_desc() != dLdx_user_mem.get_desc(); auto dLdx_mkl_mem = dLdxReorder ? dnnl::memory(op_ff_prim_desc.dst_desc(), engine) : dLdx_user_mem; argsff[DNNL_ARG_DST] = dLdx_mkl_mem; diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/tanh.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/tanh.cpp index fab32f280..53d75d0a9 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/tanh.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/tanh.cpp @@ -71,7 +71,7 @@ namespace sd { mkldnnUtils::loadDataToMklStream(x, engine, stream, x_user_md, op_prim_desc.src_desc(), args[DNNL_ARG_SRC]); // z - auto z_user_mem = dnnl::memory(z_user_md, engine, z->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, z->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -168,7 +168,7 @@ namespace sd { mkldnnUtils::loadDataToMklStream(dLdz, engine, stream, dLdz_user_md, op_prim_desc.diff_dst_desc(), args[DNNL_ARG_DIFF_DST]); // dLdx - auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->getBuffer()); + auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->buffer()); const bool dLdxReorder = op_prim_desc.diff_src_desc() != dLdx_user_mem.get_desc(); auto dLdx_mkl_mem = dLdxReorder ? dnnl::memory(op_prim_desc.diff_src_desc(), engine) : dLdx_user_mem; args[DNNL_ARG_DIFF_SRC] = dLdx_mkl_mem; diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/xw_plus_b.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/xw_plus_b.cpp index 01a003c2c..ab7f340ed 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/xw_plus_b.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/xw_plus_b.cpp @@ -131,11 +131,11 @@ namespace sd { mkldnnUtils::loadDataToMklStream(weights, engine, stream, weights_user_md, op_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // bias - auto bias_mkl_mem = dnnl::memory(bias_mkl_md, engine, bias->getBuffer()); + auto bias_mkl_mem = dnnl::memory(bias_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = bias_mkl_mem; // z - auto z_user_mem = dnnl::memory(z_user_md, engine, z->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, z->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -266,19 +266,19 @@ namespace sd { mkldnnUtils::loadDataToMklStream(weights, engine, stream, weights_user_md, op_bpdx_prim_desc.weights_desc(), argsDx[DNNL_ARG_WEIGHTS]); // dLdw - auto dLdw_user_mem = dnnl::memory(dLdw_user_md, engine, dLdw->getBuffer()); + auto dLdw_user_mem = dnnl::memory(dLdw_user_md, engine, dLdw->buffer()); const bool dLdwReorder = op_bpdw_prim_desc.diff_weights_desc() != dLdw_user_mem.get_desc(); auto dLdw_mkl_mem = dLdwReorder ? dnnl::memory(op_bpdw_prim_desc.diff_weights_desc(), engine) : dLdw_user_mem; argsDw[DNNL_ARG_DIFF_WEIGHTS] = dLdw_mkl_mem; // dLdx - auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->getBuffer()); + auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->buffer()); const bool dLdxReorder = op_bpdx_prim_desc.diff_src_desc() != dLdx_user_mem.get_desc(); auto dLdx_mkl_mem = dLdxReorder ? dnnl::memory(op_bpdx_prim_desc.diff_src_desc(), engine) : dLdx_user_mem; argsDx[DNNL_ARG_DIFF_SRC] = dLdx_mkl_mem; // dLdb - auto dLdb_user_mem = dnnl::memory(dLdb_user_md, engine, dLdb->getBuffer()); + auto dLdb_user_mem = dnnl::memory(dLdb_user_md, engine, dLdb->buffer()); const bool dLdbReorder = op_bpdw_prim_desc.diff_bias_desc() != dLdb_user_mem.get_desc(); auto dLdb_mkl_mem = dLdbReorder ? dnnl::memory(op_bpdw_prim_desc.diff_bias_desc(), engine) : dLdb_user_mem; argsDw[DNNL_ARG_DIFF_BIAS] = dLdb_mkl_mem; diff --git a/libnd4j/include/ops/impl/specials_double.hpp b/libnd4j/include/ops/impl/specials_double.hpp index 96f7d2db2..1eaf3fbc0 100644 --- a/libnd4j/include/ops/impl/specials_double.hpp +++ b/libnd4j/include/ops/impl/specials_double.hpp @@ -50,7 +50,7 @@ namespace sd { template - void quickSort_parallel_internal_key(X* key, Nd4jLong *xShapeInfo, Y* values, Nd4jLong *yShapeInfo, int left, int right, int cutoff, bool descending) { + void quickSort_parallel_internal_key(X* key, Nd4jLong const* xShapeInfo, Y* values, Nd4jLong const* yShapeInfo, int left, int right, int cutoff, bool descending) { int i = left, j = right; X ktmp; X pivot = key[shape::getIndexOffset((left + right) / 2, xShapeInfo)]; @@ -115,7 +115,7 @@ PRAGMA_OMP_TASK template - void quickSort_parallel_internal_value(X* key, Nd4jLong *xShapeInfo, Y* value, Nd4jLong *yShapeInfo, int left, int right, int cutoff, bool descending) { + void quickSort_parallel_internal_value(X* key, Nd4jLong const* xShapeInfo, Y* value, Nd4jLong const* yShapeInfo, int left, int right, int cutoff, bool descending) { int i = left, j = right; X ktmp; Y pivot = value[shape::getIndexOffset((left + right) / 2, yShapeInfo)]; @@ -180,7 +180,7 @@ PRAGMA_OMP_TASK template - static void quickSort_parallel_key(void *varray, Nd4jLong *xShapeInfo, void *yarray, Nd4jLong *yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ + static void quickSort_parallel_key(void *varray, Nd4jLong const* xShapeInfo, void *yarray, Nd4jLong const* yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ auto array = reinterpret_cast(varray); auto values = reinterpret_cast(yarray); int cutoff = 1000; @@ -195,7 +195,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } template - static void quickSort_parallel_value(void *varray, Nd4jLong *xShapeInfo, void *yarray, Nd4jLong *yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ + static void quickSort_parallel_value(void *varray, Nd4jLong const* xShapeInfo, void *yarray, Nd4jLong const* yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ auto array = reinterpret_cast(varray); auto values = reinterpret_cast(yarray); int cutoff = 1000; @@ -210,17 +210,17 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } template - void DoubleMethods::sortByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending) { + void DoubleMethods::sortByKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, bool descending) { quickSort_parallel_key(vx, xShapeInfo, vy, yShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); } template - void DoubleMethods::sortByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending) { + void DoubleMethods::sortByValue(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, bool descending) { quickSort_parallel_value(vx, xShapeInfo, vy, yShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); } template - void DoubleMethods::sortTadByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending) { + void DoubleMethods::sortTadByKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, bool descending) { auto x = reinterpret_cast(vx); auto y = reinterpret_cast(vy); @@ -244,7 +244,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } template - void DoubleMethods::sortTadByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending) { + void DoubleMethods::sortTadByValue(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, bool descending) { auto x = reinterpret_cast(vx); auto y = reinterpret_cast(vy); diff --git a/libnd4j/include/ops/impl/specials_single.hpp b/libnd4j/include/ops/impl/specials_single.hpp index c9a48f049..9a700251c 100644 --- a/libnd4j/include/ops/impl/specials_single.hpp +++ b/libnd4j/include/ops/impl/specials_single.hpp @@ -100,7 +100,7 @@ namespace sd { // auto func = PRAGMA_THREADS_FOR { // for (auto i = start; i < stop; i += increment) { // auto temp = output(indices[i], true); -// sd::TransformLoops::template loopTransform>( inArrs[i]->bufferAsT(), inArrs[i]->getShapeInfo(), temp.bufferAsT(), temp.getShapeInfo(), nullptr, 0, 1); +// sd::TransformLoops::template loopTransform>( inArrs[i]->bufferAsT(), inArrs[i]->shapeInfo(), temp.bufferAsT(), temp.shapeInfo(), nullptr, 0, 1); // } // }; @@ -150,7 +150,7 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr // if(!areInputsContin || !allSameOrder) // break; - // strideOfContigStride[i] = shape::strideOverContigAxis(axis, inArrs[i]->getShapeInfo()); + // strideOfContigStride[i] = shape::strideOverContigAxis(axis, inArrs[i]->shapeInfo()); // } // } @@ -158,7 +158,7 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr // if(luckCase2) { // for example {2,1,3} + {2,5,3} + {2,10,3} = {2,16,3}, here axis 1 shoud have stride = 1 for all inputs arrays and output array - // const auto zStep = shape::strideOverContigAxis(axis, output.getShapeInfo()); + // const auto zStep = shape::strideOverContigAxis(axis, output.shapeInfo()); // for (uint i = 0; i < output.lengthOf() / output.sizeAt(axis); ++i) { @@ -182,9 +182,9 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr for (auto i = start; i < stop; i += increment) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, output.shapeInfo(), coords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), coords); + const auto zOffset = shape::getOffset(output.shapeInfo(), coords); uint inArrIdx = 0; uint xDim = inArrs[inArrIdx]->sizeAt(axis); @@ -196,7 +196,7 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr } const T* x = inArrs[inArrIdx]->bufferAsT(); - const auto xOffset = shape::getOffset(inArrs[inArrIdx]->getShapeInfo(), coords); + const auto xOffset = shape::getOffset(inArrs[inArrIdx]->shapeInfo(), coords); zBuff[zOffset] = x[xOffset]; @@ -212,11 +212,11 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr * along a particular dimension */ template -void SpecialMethods::concatCpuGeneric(int dimension, int numArrays, Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *vresult, Nd4jLong *resultShapeInfo) { +void SpecialMethods::concatCpuGeneric(int dimension, int numArrays, Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *vresult, Nd4jLong const* resultShapeInfo) { auto result = reinterpret_cast(vresult); std::vector inputs(numArrays); - NDArray output(static_cast(result), static_cast(resultShapeInfo)); + NDArray output(static_cast(result), resultShapeInfo); for(int i = 0; i < numArrays; ++i) inputs[i] = new NDArray(static_cast(data[i]), static_cast(inputShapeInfo[i])); @@ -235,7 +235,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< const auto sizeofT = input.sizeOfT(); - T* xBuff = input.bufferAsT(); + auto xBuff = input.bufferAsT(); bool luckCase1 = ((axis == 0 && input.ordering() == 'c') || (axis == input.rankOf() - 1 && input.ordering() == 'f')) && input.ews() == 1; @@ -272,7 +272,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< // if (!areOutsContin || !allSameOrder) // break; - // strideOfContigStride[i] = shape::strideOverContigAxis(axis, outArrs[i]->getShapeInfo()); + // strideOfContigStride[i] = shape::strideOverContigAxis(axis, outArrs[i]->shapeInfo()); // } // } @@ -280,7 +280,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< // if (luckCase2) { - // const auto xStep = shape::strideOverContigAxis(axis, input.getShapeInfo()); + // const auto xStep = shape::strideOverContigAxis(axis, input.shapeInfo()); // for (uint i = 0; i < input.lengthOf() / input.sizeAt(axis); ++i) { @@ -306,8 +306,8 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< for (auto i = start; i < stop; i += increment) { - shape::index2coordsCPU(start, i, input.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(input.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, input.shapeInfo(), coords); + const auto xOffset = shape::getOffset(input.shapeInfo(), coords); uint outArrIdx = 0; temp = coords[axis]; @@ -318,7 +318,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } T* z = outArrs[outArrIdx]->bufferAsT(); - const auto zOffset = shape::getOffset(outArrs[outArrIdx]->getShapeInfo(), coords); + const auto zOffset = shape::getOffset(outArrs[outArrIdx]->shapeInfo(), coords); z[zOffset] = xBuff[xOffset]; coords[axis] = temp; @@ -339,7 +339,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< * @param length */ template - void SpecialMethods::accumulateGeneric(void **vx, void *vz, Nd4jLong *zShapeInfo, int n, const Nd4jLong length) { + void SpecialMethods::accumulateGeneric(void **vx, void *vz, Nd4jLong const* zShapeInfo, int n, const Nd4jLong length) { auto z = reinterpret_cast(vz); auto x = reinterpret_cast(vx); @@ -366,7 +366,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< * @param propagate */ template - void SpecialMethods::averageGeneric(void **vx, void *vz, Nd4jLong *zShapeInfo, int n, const Nd4jLong length, bool propagate) { + void SpecialMethods::averageGeneric(void **vx, void *vz, Nd4jLong const* zShapeInfo, int n, const Nd4jLong length, bool propagate) { auto z = reinterpret_cast(vz); auto x = reinterpret_cast(vx); @@ -416,7 +416,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } template - Nd4jLong SpecialMethods::getPosition(Nd4jLong *xShapeInfo, Nd4jLong index) { + Nd4jLong SpecialMethods::getPosition(Nd4jLong const* xShapeInfo, Nd4jLong index) { auto xEWS = shape::elementWiseStride(xShapeInfo); if (xEWS == 1) @@ -428,7 +428,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } template - void SpecialMethods::quickSort_parallel_internal(T* array, Nd4jLong *xShapeInfo, int left, int right, int cutoff, bool descending) { + void SpecialMethods::quickSort_parallel_internal(T* array, Nd4jLong const* xShapeInfo, int left, int right, int cutoff, bool descending) { int i = left, j = right; T tmp; @@ -482,7 +482,7 @@ PRAGMA_OMP_TASK } template - void SpecialMethods::quickSort_parallel(void *varray, Nd4jLong *xShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ + void SpecialMethods::quickSort_parallel(void *varray, Nd4jLong const* xShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ auto array = reinterpret_cast(varray); int cutoff = 1000; @@ -521,14 +521,14 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) template - void SpecialMethods::sortGeneric(void *vx, Nd4jLong *xShapeInfo, bool descending) { + void SpecialMethods::sortGeneric(void *vx, Nd4jLong const* xShapeInfo, bool descending) { auto x = reinterpret_cast(vx); quickSort_parallel(x, xShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); } template - void SpecialMethods::sortTadGeneric(void *vx, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending) { + void SpecialMethods::sortTadGeneric(void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { auto x = reinterpret_cast(vx); //quickSort_parallel(x, xShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); @@ -548,9 +548,9 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) template - void SpecialMethods::decodeBitmapGeneric(void *dx, Nd4jLong N, void *vz, Nd4jLong *zShapeInfo) { + void SpecialMethods::decodeBitmapGeneric(const void *dx, Nd4jLong N, void *vz, Nd4jLong const* zShapeInfo) { auto dz = reinterpret_cast(vz); - auto x = reinterpret_cast(dx); + auto x = reinterpret_cast(dx); Nd4jLong lim = N / 16 + 5; FloatBits2 fb; @@ -585,7 +585,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } template - Nd4jLong SpecialMethods::encodeBitmapGeneric(void *vx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold) { + Nd4jLong SpecialMethods::encodeBitmapGeneric(void *vx, Nd4jLong const* xShapeInfo, Nd4jLong N, int *dz, float threshold) { auto dx = reinterpret_cast(vx); const T two(2.0f); const T zero(0.0f); diff --git a/libnd4j/include/ops/ops.h b/libnd4j/include/ops/ops.h index 2f02af11b..21cd07c40 100644 --- a/libnd4j/include/ops/ops.h +++ b/libnd4j/include/ops/ops.h @@ -37,21 +37,21 @@ #define DOUBLE_PI_T T(2.0 * 3.14159265358979323846) #define DOUBLE_PI_X X(2.0 * 3.14159265358979323846) -#define no_op_exec_special_any static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_bool static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_same static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, X *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, Z *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_accumulation static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, Z *extraParams, Z *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){} -#define no_op_exec_special_accumulation_long static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, X *extraParams, Z *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){} -#define no_op_exec_special_accumulation_same static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, X *extraParams, X *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){} +#define no_op_exec_special_any static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_bool static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_same static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, X *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, Z *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_accumulation static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, Z *extraParams, Z *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){} +#define no_op_exec_special_accumulation_long static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, X *extraParams, Z *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){} +#define no_op_exec_special_accumulation_same static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, X *extraParams, X *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){} #ifdef __CUDACC__ -#define no_op_exec_special_any_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_bool_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_same_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, X *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, X *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer,Z *result, Nd4jLong *resultShapeBuffer,Z *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_accumulation_same_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, X *extraParams, X *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, X *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_accumulation_long_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, X *extraParams, Z *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_accumulation_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, Z *extraParams, Z *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {} +#define no_op_exec_special_any_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_bool_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_same_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, X *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, X *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer,Z *result, const Nd4jLong *resultShapeBuffer,Z *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_accumulation_same_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, X *extraParams, X *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, X *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_accumulation_long_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, X *extraParams, Z *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_accumulation_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, Z *extraParams, Z *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {} #else // hacky fix for isnan/being being out of scope @@ -4017,7 +4017,7 @@ namespace simdOps { return 0; } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; @@ -4064,7 +4064,7 @@ namespace simdOps { return -sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = -1; @@ -4123,7 +4123,7 @@ namespace simdOps { return -sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = -1; @@ -4191,7 +4191,7 @@ namespace simdOps { return -sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; @@ -4217,7 +4217,7 @@ namespace simdOps { return sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; @@ -4273,7 +4273,7 @@ namespace simdOps { return sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; diff --git a/libnd4j/include/ops/random_ops.h b/libnd4j/include/ops/random_ops.h index 939ffa975..d738589a7 100644 --- a/libnd4j/include/ops/random_ops.h +++ b/libnd4j/include/ops/random_ops.h @@ -32,10 +32,10 @@ #define method_X random_def T op(T valueX, Nd4jLong idx, Nd4jLong length, sd::graph::RandomGenerator* rng, T *extraParams) { return -2.0f; } #define method_XY random_def T op(T valueX, T valueY, Nd4jLong idx, Nd4jLong length, sd::graph::RandomGenerator* rng, T *extraParams) { return -3.0f; } -#define no_exec_special static const bool requiresSpecial = false; static inline void specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { } +#define no_exec_special static const bool requiresSpecial = false; static inline void specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { } #ifdef __CUDACC__ -#define no_exec_special_cuda __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { } +#define no_exec_special_cuda __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { } #else #define no_exec_special_cuda #endif diff --git a/libnd4j/include/ops/special_random_ops.h b/libnd4j/include/ops/special_random_ops.h index 50a50752e..08808e67c 100644 --- a/libnd4j/include/ops/special_random_ops.h +++ b/libnd4j/include/ops/special_random_ops.h @@ -42,7 +42,7 @@ namespace randomOps { #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { /** * X holds data, * Y holds probabilities @@ -141,7 +141,7 @@ namespace randomOps { } #endif - static inline void specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + static inline void specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { /** * X holds data, * Y holds probabilities @@ -230,7 +230,7 @@ namespace randomOps { static const bool requiresSpecial = true; #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const *yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { __shared__ T epsilon; __shared__ T two_pi; @@ -304,7 +304,7 @@ namespace randomOps { static inline void - specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { const T two_pi = static_cast(2.0f) * static_cast(3.14159265358979323846); auto zLength = shape::length(zShapeBuffer); @@ -373,7 +373,7 @@ namespace randomOps { static const bool requiresSpecial = true; #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { int trials = (int) extraArguments[0]; T prob = extraArguments[1]; @@ -424,7 +424,7 @@ namespace randomOps { } #endif - static inline void specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + static inline void specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { int trials = (int) extraArguments[0]; Nd4jLong zLength = shape::length(zShapeBuffer); @@ -480,7 +480,7 @@ namespace randomOps { static const bool requiresSpecial = true; #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { int trials = (int) extraArguments[0]; T prob = extraArguments[1]; @@ -532,7 +532,7 @@ namespace randomOps { } #endif - static inline void specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + static inline void specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { int trials = (int) extraArguments[0]; Nd4jLong zLength = shape::length(zShapeBuffer); @@ -546,8 +546,7 @@ namespace randomOps { T prob = extraArguments[1]; - //sd::random::RandomBuffer *buffer = reinterpret_cast (state); - sd::graph::RandomGenerator* rng = reinterpret_cast(state); + auto rng = reinterpret_cast(state); auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) { @@ -606,7 +605,7 @@ namespace randomOps { static const bool requiresSpecial = true; #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { __shared__ T epsilon; __shared__ T two_pi; @@ -673,12 +672,12 @@ namespace randomOps { #endif static inline void - specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { GaussianDistribution::specialOp(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments); Nd4jLong zLength = shape::length(zShapeBuffer); //auto yEWS = shape::elementWiseStride(yShapeBuffer); //auto zEWS = shape::elementWiseStride(zShapeBuffer); - sd::graph::RandomGenerator* rng = reinterpret_cast(state); + auto rng = reinterpret_cast(state); T mean = extraArguments[0]; T stddev = extraArguments[1]; T ds = sd::math::nd4j_abs(stddev) * (T) 2.0f; @@ -718,7 +717,7 @@ namespace randomOps { #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { __shared__ T epsilon; __shared__ T two_pi; @@ -791,7 +790,7 @@ namespace randomOps { #endif static inline void - specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { const T two_pi = static_cast(2.0f) * static_cast(3.14159265358979323846); Nd4jLong zLength = shape::length(zShapeBuffer); @@ -809,8 +808,7 @@ namespace randomOps { // we're enforcing even chunks, since it's mandatory for this algorithm span -= span % 2; -// auto buffer = reinterpret_cast (state); - sd::graph::RandomGenerator* rng = reinterpret_cast(state); + auto rng = reinterpret_cast(state); const T mean = extraArguments[0]; const T stddev = extraArguments[1]; diff --git a/libnd4j/include/ops/specials.h b/libnd4j/include/ops/specials.h index c250d72f6..ed5f8fb8c 100644 --- a/libnd4j/include/ops/specials.h +++ b/libnd4j/include/ops/specials.h @@ -50,36 +50,36 @@ namespace sd { template class ND4J_EXPORT SpecialMethods { public: - static void concatCpuGeneric(const std::vector& inArrs, NDArray& output, const int axis); - static void concatCpuGeneric(int dimension, int numArrays, Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *result, Nd4jLong *resultShapeInfo); - static void splitCpuGeneric(const NDArray& input, const std::vector& outArrs, const int axis); - static void accumulateGeneric(void **x, void *z, Nd4jLong *zShapeInfo, int n, const Nd4jLong length); - static void averageGeneric(void **x, void *z, Nd4jLong *zShapeInfo, int n, const Nd4jLong length, bool propagate); + static void concatCpuGeneric(const std::vector& inArrs, NDArray& output, int axis); + static void concatCpuGeneric(int dimension, int numArrays, Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *result, Nd4jLong const* resultShapeInfo); + static void splitCpuGeneric(const NDArray& input, const std::vector& outArrs, int axis); + static void accumulateGeneric(void **x, void *z, const Nd4jLong *zShapeInfo, int n, Nd4jLong length); + static void averageGeneric(void **x, void *z, const Nd4jLong *zShapeInfo, int n, Nd4jLong length, bool propagate); - static Nd4jLong getPosition(Nd4jLong *xShapeInfo, Nd4jLong index); - static void quickSort_parallel_internal(T* array, Nd4jLong *xShapeInfo, int left, int right, int cutoff, bool descending); - static void quickSort_parallel(void* array, Nd4jLong *xShapeInfo, Nd4jLong lenArray, int numThreads, bool descending); + static Nd4jLong getPosition(const Nd4jLong *xShapeInfo, Nd4jLong index); + static void quickSort_parallel_internal(T* array, const Nd4jLong *xShapeInfo, int left, int right, int cutoff, bool descending); + static void quickSort_parallel(void* array, const Nd4jLong *xShapeInfo, Nd4jLong lenArray, int numThreads, bool descending); static int nextPowerOf2(int number); static int lastPowerOf2(int number); - static void sortGeneric(void *x, Nd4jLong *xShapeInfo, bool descending); - static void sortTadGeneric(void *x, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending); + static void sortGeneric(void *x, const Nd4jLong *xShapeInfo, bool descending); + static void sortTadGeneric(void *x, const Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, bool descending); - static void decodeBitmapGeneric(void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo); - static Nd4jLong encodeBitmapGeneric(void *dx, Nd4jLong *zShapeInfo, Nd4jLong N, int *dz, float threshold); + static void decodeBitmapGeneric(const void *dx, Nd4jLong N, void *dz, const Nd4jLong *zShapeInfo); + static Nd4jLong encodeBitmapGeneric(void *dx, const Nd4jLong *zShapeInfo, Nd4jLong N, int *dz, float threshold); }; template class ND4J_EXPORT DoubleMethods{ public: - static void sortByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending); - static void sortByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending); + static void sortByKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, bool descending); + static void sortByValue(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, bool descending); - static void sortTadByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending); - static void sortTadByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending); + static void sortTadByKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, bool descending); + static void sortTadByValue(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, bool descending); }; } diff --git a/libnd4j/include/ops/specials_cuda.h b/libnd4j/include/ops/specials_cuda.h index bdff91dd0..a12fd302f 100644 --- a/libnd4j/include/ops/specials_cuda.h +++ b/libnd4j/include/ops/specials_cuda.h @@ -28,39 +28,39 @@ //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int j, int k, int length, bool descending); +__host__ void bitonicSortStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int j, int k, int length, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicArbitraryStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int window, int length, int reverse, bool descending); +__host__ void bitonicArbitraryStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int window, int length, int reverse, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending); +__host__ void bitonicSortStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicArbitraryStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending); +__host__ void bitonicArbitraryStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending); +__host__ void bitonicSortStepGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicArbitraryStepGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending); +__host__ void bitonicArbitraryStepGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void oesTadGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending); +__host__ void oesTadGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending); template -__host__ void oesTadGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending); +__host__ void oesTadGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending); template -__host__ void oesTadGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending); +__host__ void oesTadGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending); //////////////////////////////////////////////////////////////////////// template diff --git a/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp index 51c6e2375..5a4db9fb8 100644 --- a/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp @@ -195,8 +195,8 @@ TEST_F(BroadcastableOpsTests, Test_Shape_1) { TEST_F(BroadcastableOpsTests, Test_Shape_2) { sd::ops::minimum op; - Nd4jLong shapeX[] = {2, 1, 1, 1, 1, 8192, 1, 99}; - Nd4jLong shapeY[] = {2, 2, 5, 5, 1, 8192, 1, 99}; + const Nd4jLong shapeX[] = {2, 1, 1, 1, 1, 8192, 1, 99}; + const Nd4jLong shapeY[] = {2, 2, 5, 5, 1, 8192, 1, 99}; ShapeList inputShape({shapeX, shapeY}); VariableSpace vs; Context ctx(1, &vs, false); @@ -213,8 +213,8 @@ TEST_F(BroadcastableOpsTests, Test_Shape_2) { TEST_F(BroadcastableOpsTests, Test_Shape_3) { sd::ops::minimum op; - Nd4jLong shapeX[] = {2, 5, 3, 1, 1, 8192, 1, 99}; - Nd4jLong shapeY[] = {2, 1, 3, 3, 1, 8192, 1, 99}; + const Nd4jLong shapeX[] = {2, 5, 3, 1, 1, 8192, 1, 99}; + const Nd4jLong shapeY[] = {2, 1, 3, 3, 1, 8192, 1, 99}; ShapeList inputShape({shapeX, shapeY}); VariableSpace vs; Context ctx(1, &vs, false); @@ -231,8 +231,8 @@ TEST_F(BroadcastableOpsTests, Test_Shape_3) { TEST_F(BroadcastableOpsTests, Test_Shape_4) { sd::ops::minimum op; - Nd4jLong shapeX[] = {2, 5, 3, 1, 1, 8192, 1, 99}; - Nd4jLong shapeY[] = {2, 5, 1, 1, 1, 8192, 1, 99}; + const Nd4jLong shapeX[] = {2, 5, 3, 1, 1, 8192, 1, 99}; + const Nd4jLong shapeY[] = {2, 5, 1, 1, 1, 8192, 1, 99}; ShapeList inputShape({shapeX, shapeY}); VariableSpace vs; Context ctx(1, &vs, false); @@ -250,9 +250,9 @@ TEST_F(BroadcastableOpsTests, Test_Shape_4) { TEST_F(BroadcastableOpsTests, Test_Shape_5) { sd::ops::minimum op; - Nd4jLong shapeX[] = {3, 2, 1, 3, 3, 3, 1, 8192, 1, 99}; - Nd4jLong shapeY[] = {2, 4, 3, 3, 1, 8192, 1, 99}; - Nd4jLong shapeE[] = {3, 2, 4, 3, 12, 3, 1, 8192, 1, 99}; + const Nd4jLong shapeX[] = {3, 2, 1, 3, 3, 3, 1, 8192, 1, 99}; + const Nd4jLong shapeY[] = {2, 4, 3, 3, 1, 8192, 1, 99}; + const Nd4jLong shapeE[] = {3, 2, 4, 3, 12, 3, 1, 8192, 1, 99}; ShapeList inputShape({shapeX, shapeY}); VariableSpace vs; Context ctx(1, &vs, false); diff --git a/libnd4j/tests_cpu/layers_tests/BrodcastTests.cpp b/libnd4j/tests_cpu/layers_tests/BrodcastTests.cpp index ed97c3137..34d0132bb 100644 --- a/libnd4j/tests_cpu/layers_tests/BrodcastTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/BrodcastTests.cpp @@ -37,7 +37,7 @@ public: #ifndef __CUDABLAS__ TEST_F(BroadcastMultiDimTest,MultimDimTest) { - shape::TAD *tad = new shape::TAD(); + auto tad = new shape::TAD(); tad->init(inputShapeBuffer,dimensions,dimensionLength); tad->createTadOnlyShapeInfo(); tad-> createOffsets(); @@ -55,6 +55,7 @@ TEST_F(BroadcastMultiDimTest,MultimDimTest) { tad->tadOffsets, //tadOffset tad->tadOnlyShapeInfo, //tadShapeInfoZ tad->tadOffsets, sd::LoopKind::COMMON, 0, tad->numTads); //tadOffsetZ + for(int i = 0; i < 30; i++) { ASSERT_EQ(dataAssertion[i],result[i]); } diff --git a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp index 149ab3c5f..4438e5fe6 100644 --- a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp @@ -574,7 +574,7 @@ TYPED_TEST(TypedConvolutionTests1, conv2D_BP_Bias_1) { TypeParam _expEpsB[] = {952.0, 1540.0, 1636.0, 1180.0, 1791.0, 2886.0, 3057.0, 2193.0, 2223.0, 3570.0, 3741.0, 2673.0, 1900.0, 3028.0, 3160.0, 2240.0, 2872.0, 4612.0, 4708.0, 3356.0, 5247.0, 8358.0, 8529.0, 6033.0, 5679.0, 9042.0, 9213.0, 6513.0, 4588.0, 7252.0, 7384.0, 5184.0}; - NDArray expEps(_expEpsB, input.getShapeInfo()); + NDArray expEps(_expEpsB, input.shapeInfo()); input.linspace(1); weights.linspace(1); @@ -624,7 +624,7 @@ TYPED_TEST(TypedConvolutionTests1, conv2D_BP_NoBias_1) { TypeParam _expEpsB[] = {952.0, 1540.0, 1636.0, 1180.0, 1791.0, 2886.0, 3057.0, 2193.0, 2223.0, 3570.0, 3741.0, 2673.0, 1900.0, 3028.0, 3160.0, 2240.0, 2872.0, 4612.0, 4708.0, 3356.0, 5247.0, 8358.0, 8529.0, 6033.0, 5679.0, 9042.0, 9213.0, 6513.0, 4588.0, 7252.0, 7384.0, 5184.0}; - NDArray expEps(_expEpsB, input.getShapeInfo()); + NDArray expEps(_expEpsB, input.shapeInfo()); input.linspace(1); weights.linspace(1); @@ -2413,7 +2413,7 @@ TYPED_TEST(TypedConvolutionTests1, conv2D_input_BP_test1) { TypeParam _expEpsB[] = {952.0, 1540.0, 1636.0, 1180.0, 1791.0, 2886.0, 3057.0, 2193.0, 2223.0, 3570.0, 3741.0, 2673.0, 1900.0, 3028.0, 3160.0, 2240.0, 2872.0, 4612.0, 4708.0, 3356.0, 5247.0, 8358.0, 8529.0, 6033.0, 5679.0, 9042.0, 9213.0, 6513.0, 4588.0, 7252.0, 7384.0, 5184.0}; - NDArray expEps(_expEpsB, shapeArr.getShapeInfo()); + NDArray expEps(_expEpsB, shapeArr.shapeInfo()); weights.linspace(1); epsilonNext.linspace(1); diff --git a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu index a0104e637..972435523 100644 --- a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu +++ b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu @@ -147,10 +147,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { cudaResult = cudaMalloc(reinterpret_cast(&dX2), x2.lengthOf() * x2.sizeOfT()); ASSERT_EQ(0, cudaResult); cudaResult = cudaMalloc(reinterpret_cast(&dX3), x3.lengthOf() * x3.sizeOfT()); ASSERT_EQ(0, cudaResult); cudaResult = cudaMalloc(reinterpret_cast(&dZ), scalar.lengthOf() * scalar.sizeOfT()); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX1ShapeInfo), shape::shapeInfoByteLength(x1.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX2ShapeInfo), shape::shapeInfoByteLength(x2.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX3ShapeInfo), shape::shapeInfoByteLength(x3.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dZShapeInfo), shape::shapeInfoByteLength(scalar.getShapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX1ShapeInfo), shape::shapeInfoByteLength(x1.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX2ShapeInfo), shape::shapeInfoByteLength(x2.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX3ShapeInfo), shape::shapeInfoByteLength(x3.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dZShapeInfo), shape::shapeInfoByteLength(scalar.shapeInfo())); ASSERT_EQ(0, cudaResult); cudaStream_t stream; cudaResult = cudaStreamCreate(&stream); @@ -164,10 +164,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { cudaMemcpyAsync(dX1, x1.buffer(), x1.lengthOf() * x1.sizeOfT(), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(dX2, x2.buffer(), x2.lengthOf() * x2.sizeOfT(), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(dX3, x3.buffer(), x3.lengthOf() * x3.sizeOfT(), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX1ShapeInfo, x1.getShapeInfo(), shape::shapeInfoByteLength(x1.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX2ShapeInfo, x2.getShapeInfo(), shape::shapeInfoByteLength(x2.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX3ShapeInfo, x3.getShapeInfo(), shape::shapeInfoByteLength(x3.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dZShapeInfo, scalar.getShapeInfo(), shape::shapeInfoByteLength(scalar.getShapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX1ShapeInfo, x1.shapeInfo(), shape::shapeInfoByteLength(x1.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX2ShapeInfo, x2.shapeInfo(), shape::shapeInfoByteLength(x2.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX3ShapeInfo, x3.shapeInfo(), shape::shapeInfoByteLength(x3.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dZShapeInfo, scalar.shapeInfo(), shape::shapeInfoByteLength(scalar.shapeInfo()), cudaMemcpyHostToDevice, stream); void* reductionPointer = nullptr; cudaResult = cudaMalloc(reinterpret_cast(&reductionPointer), 1024*1024); @@ -181,10 +181,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { NativeOpExecutioner::execIndexReduceScalar(&lc, sd::indexreduce::IndexAbsoluteMax, - x1.buffer(), x1.getShapeInfo(), + x1.buffer(), x1.shapeInfo(), dX1, dX1ShapeInfo, nullptr, - scalar.buffer(), scalar.getShapeInfo(), + scalar.buffer(), scalar.shapeInfo(), dZ, dZShapeInfo); cudaResult = cudaStreamSynchronize(stream); @@ -203,10 +203,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { NativeOpExecutioner::execIndexReduceScalar(&lc, sd::indexreduce::IndexAbsoluteMax, - nullptr, x2.getShapeInfo(), + nullptr, x2.shapeInfo(), dX2, dX2ShapeInfo, nullptr, - nullptr, scalar.getShapeInfo(), + nullptr, scalar.shapeInfo(), dZ, dZShapeInfo); cudaResult = cudaStreamSynchronize(stream); @@ -223,10 +223,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { NativeOpExecutioner::execIndexReduceScalar(&lc, sd::indexreduce::IndexAbsoluteMax, - nullptr, x3.getShapeInfo(), + nullptr, x3.shapeInfo(), dX3, dX3ShapeInfo, nullptr, - nullptr, scalar.getShapeInfo(), + nullptr, scalar.shapeInfo(), dZ, dZShapeInfo); cudaResult = cudaStreamSynchronize(stream); @@ -279,10 +279,10 @@ TEST_F(CudaBasicsTests1, execReduce3Scalar_1) { cudaResult = cudaMalloc(reinterpret_cast(&dX4), x4.lengthOf() * x4.sizeOfT()); ASSERT_EQ(0, cudaResult); cudaResult = cudaMalloc(reinterpret_cast(&dZ1), scalar1.lengthOf() * scalar1.sizeOfT()); ASSERT_EQ(0, cudaResult); cudaResult = cudaMalloc(reinterpret_cast(&dZ2), scalar2.lengthOf() * scalar2.sizeOfT()); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX1ShapeInfo), shape::shapeInfoByteLength(x1.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX3ShapeInfo), shape::shapeInfoByteLength(x3.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dZ1ShapeInfo), shape::shapeInfoByteLength(scalar1.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dZ2ShapeInfo), shape::shapeInfoByteLength(scalar2.getShapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX1ShapeInfo), shape::shapeInfoByteLength(x1.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX3ShapeInfo), shape::shapeInfoByteLength(x3.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dZ1ShapeInfo), shape::shapeInfoByteLength(scalar1.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dZ2ShapeInfo), shape::shapeInfoByteLength(scalar2.shapeInfo())); ASSERT_EQ(0, cudaResult); cudaStream_t stream; cudaResult = cudaStreamCreate(&stream); @@ -299,10 +299,10 @@ TEST_F(CudaBasicsTests1, execReduce3Scalar_1) { cudaMemcpyAsync(dX2, x2.buffer(), x2.lengthOf() * x2.sizeOfT(), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(dX3, x3.buffer(), x3.lengthOf() * x3.sizeOfT(), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(dX4, x4.buffer(), x4.lengthOf() * x4.sizeOfT(), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX1ShapeInfo, x1.getShapeInfo(), shape::shapeInfoByteLength(x1.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX3ShapeInfo, x3.getShapeInfo(), shape::shapeInfoByteLength(x3.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dZ1ShapeInfo, scalar1.getShapeInfo(), shape::shapeInfoByteLength(scalar1.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dZ2ShapeInfo, scalar2.getShapeInfo(), shape::shapeInfoByteLength(scalar2.getShapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX1ShapeInfo, x1.shapeInfo(), shape::shapeInfoByteLength(x1.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX3ShapeInfo, x3.shapeInfo(), shape::shapeInfoByteLength(x3.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dZ1ShapeInfo, scalar1.shapeInfo(), shape::shapeInfoByteLength(scalar1.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dZ2ShapeInfo, scalar2.shapeInfo(), shape::shapeInfoByteLength(scalar2.shapeInfo()), cudaMemcpyHostToDevice, stream); /***************************************/ @@ -316,7 +316,7 @@ TEST_F(CudaBasicsTests1, execReduce3Scalar_1) { /***************************************/ - NativeOpExecutioner::execReduce3Scalar(&lc, sd::reduce3::Dot,nullptr, x1.getShapeInfo(),dX1, dX1ShapeInfo, nullptr, nullptr, x2.getShapeInfo(),dX2, dX1ShapeInfo,nullptr, scalar1.getShapeInfo(),dZ1, dZ1ShapeInfo); + NativeOpExecutioner::execReduce3Scalar(&lc, sd::reduce3::Dot,nullptr, x1.shapeInfo(),dX1, dX1ShapeInfo, nullptr, nullptr, x2.shapeInfo(),dX2, dX1ShapeInfo,nullptr, scalar1.shapeInfo(),dZ1, dZ1ShapeInfo); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -333,7 +333,7 @@ TEST_F(CudaBasicsTests1, execReduce3Scalar_1) { /***************************************/ - NativeOpExecutioner::execReduce3Scalar(&lc, sd::reduce3::Dot,nullptr, x3.getShapeInfo(),dX3, dX3ShapeInfo, nullptr, nullptr, x4.getShapeInfo(),dX4, dX3ShapeInfo,nullptr, scalar2.getShapeInfo(),dZ2, dZ2ShapeInfo); + NativeOpExecutioner::execReduce3Scalar(&lc, sd::reduce3::Dot,nullptr, x3.shapeInfo(),dX3, dX3ShapeInfo, nullptr, nullptr, x4.shapeInfo(),dX4, dX3ShapeInfo,nullptr, scalar2.shapeInfo(),dZ2, dZ2ShapeInfo); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -387,10 +387,10 @@ TEST_F(CudaBasicsTests1, execReduce3_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), nullptr, nullptr, nullptr, nullptr); @@ -436,10 +436,10 @@ TEST_F(CudaBasicsTests1, execReduce3_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), nullptr, nullptr, nullptr, nullptr); @@ -471,13 +471,13 @@ TEST_F(CudaBasicsTests1, execReduce3_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -502,10 +502,10 @@ TEST_F(CudaBasicsTests1, execReduce3_3) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -537,13 +537,13 @@ TEST_F(CudaBasicsTests1, execReduce3_4) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -568,10 +568,10 @@ TEST_F(CudaBasicsTests1, execReduce3_4) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -603,13 +603,13 @@ TEST_F(CudaBasicsTests1, execReduce3_5) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -634,10 +634,10 @@ TEST_F(CudaBasicsTests1, execReduce3_5) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -669,13 +669,13 @@ TEST_F(CudaBasicsTests1, execReduce3All_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -700,10 +700,10 @@ TEST_F(CudaBasicsTests1, execReduce3All_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3All(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -735,13 +735,13 @@ TEST_F(CudaBasicsTests1, execReduce3All_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -766,10 +766,10 @@ TEST_F(CudaBasicsTests1, execReduce3All_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3All(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -800,7 +800,7 @@ TEST_F(CudaBasicsTests1, execIndexReduce_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -823,9 +823,9 @@ TEST_F(CudaBasicsTests1, execIndexReduce_1) { // call cuda kernel which calculates result NativeOpExecutioner::execIndexReduce(&lc, sd::indexreduce::IndexMax, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -861,7 +861,7 @@ TEST_F(CudaBasicsTests1, execIndexReduce_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -885,9 +885,9 @@ TEST_F(CudaBasicsTests1, execIndexReduce_2) { // call cuda kernel which calculates result NativeOpExecutioner::execIndexReduce(&lc, sd::indexreduce::IndexMax, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -923,7 +923,7 @@ TEST_F(CudaBasicsTests1, execIndexReduce_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -946,9 +946,9 @@ TEST_F(CudaBasicsTests1, execIndexReduce_3) { // call cuda kernel which calculates result NativeOpExecutioner::execIndexReduce(&lc, sd::indexreduce::IndexMax, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -986,9 +986,9 @@ TEST_F(CudaBasicsTests1, execScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execScalar(&lc, sd::scalar::Divide, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalar.getShapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1021,9 +1021,9 @@ TEST_F(CudaBasicsTests1, execScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execScalar(&lc, sd::scalar::CopyPws, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalar.getShapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1053,7 +1053,7 @@ TEST_F(CudaBasicsTests1, execScalar_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1076,10 +1076,10 @@ TEST_F(CudaBasicsTests1, execScalar_3) { // call cuda kernel which calculates result NativeOpExecutioner::execScalar(&lc, sd::scalar::Divide, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalars.getShapeInfo(), scalars.specialBuffer(), scalars.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalars.shapeInfo(), scalars.specialBuffer(), scalars.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1116,9 +1116,9 @@ TEST_F(CudaBasicsTests1, execScalarBool_1) { // call cuda kernel which calculates result // call cuda kernel which calculates result NativeOpExecutioner::execScalarBool(&lc, sd::scalar::GreaterThan, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalar.getShapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1144,7 +1144,7 @@ TEST_F(CudaBasicsTests1, execScalarBool_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1166,10 +1166,10 @@ TEST_F(CudaBasicsTests1, execScalarBool_2) { // call cuda kernel which calculates result NativeOpExecutioner::execScalarBool(&lc, sd::scalar::GreaterThan, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalars.getShapeInfo(), scalars.specialBuffer(), scalars.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalars.shapeInfo(), scalars.specialBuffer(), scalars.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1205,7 +1205,7 @@ TEST_F(CudaBasicsTests1, execBroadcast_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1227,9 +1227,9 @@ TEST_F(CudaBasicsTests1, execBroadcast_1) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(&lc, sd::broadcast::Add, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1265,7 +1265,7 @@ TEST_F(CudaBasicsTests1, execBroadcast_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1287,9 +1287,9 @@ TEST_F(CudaBasicsTests1, execBroadcast_2) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(&lc, sd::broadcast::Add, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1322,7 +1322,7 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1344,9 +1344,9 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_1) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcastBool(&lc, sd::broadcast::EqualTo, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], @@ -1380,7 +1380,7 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1403,9 +1403,9 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_2) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcastBool(&lc, sd::broadcast::EqualTo, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], @@ -1447,9 +1447,9 @@ TEST_F(CudaBasicsTests1, execPairwiseTransform_1) { // call cuda kernel which calculates result NativeOpExecutioner::execPairwiseTransform(&lc, sd::pairwise::Subtract, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1481,9 +1481,9 @@ TEST_F(CudaBasicsTests1, execPairwiseBoolTransform_1) { // call cuda kernel which calculates result NativeOpExecutioner::execPairwiseBoolTransform(&lc, sd::pairwise::EqualTo, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1515,8 +1515,8 @@ TEST_F(CudaBasicsTests1, execTransformFloat_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformFloat(&lc, sd::transform::Sqrt, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1545,8 +1545,8 @@ TEST_F(CudaBasicsTests1, execTransformFloat_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformFloat(&lc, sd::transform::Sqrt, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1576,8 +1576,8 @@ TEST_F(CudaBasicsTests1, execTransformAny_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformAny(&lc, sd::transform::Assign, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1606,8 +1606,8 @@ TEST_F(CudaBasicsTests1, execTransformAny_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformAny(&lc, sd::transform::Assign, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1637,8 +1637,8 @@ TEST_F(CudaBasicsTests1, execTransformStrict_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformStrict(&lc, sd::transform::CubeDerivative, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1667,8 +1667,8 @@ TEST_F(CudaBasicsTests1, execTransformStrict_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformStrict(&lc, sd::transform::CubeDerivative, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1698,8 +1698,8 @@ TEST_F(CudaBasicsTests1, execTransformSame_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformSame(&lc, sd::transform::Square, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1728,8 +1728,8 @@ TEST_F(CudaBasicsTests1, execTransformSame_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformSame(&lc, sd::transform::Square, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1759,8 +1759,8 @@ TEST_F(CudaBasicsTests1, execTransformBool_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformBool(&lc, sd::transform::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1789,8 +1789,8 @@ TEST_F(CudaBasicsTests1, execTransformBool_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformBool(&lc, sd::transform::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1816,7 +1816,7 @@ TEST_F(CudaBasicsTests1, execReduceFloat_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1838,9 +1838,9 @@ TEST_F(CudaBasicsTests1, execReduceFloat_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceFloat(&lc, sd::reduce::Mean, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -1870,7 +1870,7 @@ TEST_F(CudaBasicsTests1, execReduceFloat_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1892,9 +1892,9 @@ TEST_F(CudaBasicsTests1, execReduceFloat_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceFloat(&lc, sd::reduce::Mean, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -1925,7 +1925,7 @@ TEST_F(CudaBasicsTests1, execReduceSame_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1947,9 +1947,9 @@ TEST_F(CudaBasicsTests1, execReduceSame_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceSame(&lc, sd::reduce::Sum, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -1979,7 +1979,7 @@ TEST_F(CudaBasicsTests1, execReduceSame_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2001,9 +2001,9 @@ TEST_F(CudaBasicsTests1, execReduceSame_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceSame(&lc, sd::reduce::Sum, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2035,7 +2035,7 @@ TEST_F(CudaBasicsTests1, execReduceBool_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2057,9 +2057,9 @@ TEST_F(CudaBasicsTests1, execReduceBool_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceBool(&lc, sd::reduce::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2089,7 +2089,7 @@ TEST_F(CudaBasicsTests1, execReduceBool_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2111,9 +2111,9 @@ TEST_F(CudaBasicsTests1, execReduceBool_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceBool(&lc, sd::reduce::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2144,7 +2144,7 @@ TEST_F(CudaBasicsTests1, execReduceLong_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2166,9 +2166,9 @@ TEST_F(CudaBasicsTests1, execReduceLong_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceLong(&lc, sd::reduce::CountNonZero, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2198,7 +2198,7 @@ TEST_F(CudaBasicsTests1, execReduceLong_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2220,9 +2220,9 @@ TEST_F(CudaBasicsTests1, execReduceLong_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceLong(&lc, sd::reduce::CountNonZero, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2263,9 +2263,9 @@ TEST_F(CudaBasicsTests1, execReduceFloatScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceFloatScalar(&lc, sd::reduce::Mean, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2299,9 +2299,9 @@ TEST_F(CudaBasicsTests1, execReduceFloatScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceFloatScalar(&lc, sd::reduce::Mean, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2336,9 +2336,9 @@ TEST_F(CudaBasicsTests1, execReduceSameScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceSameScalar(&lc, sd::reduce::Sum, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2372,9 +2372,9 @@ TEST_F(CudaBasicsTests1, execReduceSameScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceSameScalar(&lc, sd::reduce::Sum, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2410,9 +2410,9 @@ TEST_F(CudaBasicsTests1, execReduceBoolScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceBoolScalar(&lc, sd::reduce::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2446,9 +2446,9 @@ TEST_F(CudaBasicsTests1, execReduceBoolScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceBoolScalar(&lc, sd::reduce::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2484,9 +2484,9 @@ TEST_F(CudaBasicsTests1, execReduceLongScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceLongScalar(&lc, sd::reduce::CountNonZero, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2520,9 +2520,9 @@ TEST_F(CudaBasicsTests1, execReduceLongScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceLongScalar(&lc, sd::reduce::CountNonZero, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2552,10 +2552,10 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_1) { PointersManager pm(context, "execReduce3TAD_1"); // call cuda kernel which calculates result NativeOpExecutioner::execReduce3TAD(context, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, dimensions.size(), packX.specialShapeInfo(), packX.specialOffsets(), nullptr, nullptr); pm.synchronize(); @@ -2580,7 +2580,7 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2603,10 +2603,10 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3TAD(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -2636,7 +2636,7 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2659,10 +2659,10 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_3) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3TAD(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2692,7 +2692,7 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_4) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2714,10 +2714,10 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_4) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3TAD(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2753,9 +2753,9 @@ TEST_F(CudaBasicsTests1, execSummaryStats_1) { // call cuda kernel which calculates result NativeOpExecutioner::execSummaryStats(&lc, sd::variance::SummaryStatsStandardDeviation, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), true); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -2780,7 +2780,7 @@ TEST_F(CudaBasicsTests1, execSummaryStats_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2802,9 +2802,9 @@ TEST_F(CudaBasicsTests1, execSummaryStats_2) { // call cuda kernel which calculates result NativeOpExecutioner::execSummaryStats(&lc, sd::variance::SummaryStatsStandardDeviation, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], true); @@ -2834,7 +2834,7 @@ TEST_F(CudaBasicsTests1, execSummaryStats_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2856,9 +2856,9 @@ TEST_F(CudaBasicsTests1, execSummaryStats_3) { // call cuda kernel which calculates result NativeOpExecutioner::execSummaryStats(&lc, sd::variance::SummaryStatsStandardDeviation, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], true); @@ -2895,9 +2895,9 @@ TEST_F(CudaBasicsTests1, execSummaryStatsScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execSummaryStatsScalar(&lc, sd::variance::SummaryStatsStandardDeviation, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), true); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -2944,9 +2944,9 @@ TEST_F(CudaBasicsTests1, execRandom_1) { // // call cuda kernel which calculates result // NativeOpExecutioner::execRandom(&lc, sd::random::GaussianDistribution, // &gen, -// nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), -// nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), -// nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), +// nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), +// nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), +// nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), // extraArguments.argumentsAsT(z.dataType())); // // cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -2992,8 +2992,8 @@ TEST_F(CudaBasicsTests1, execRandom_2) { // call cuda kernel which calculates result NativeOpExecutioner::execRandom(lc, sd::random::DropOut, &gen, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extraArguments.argumentsAsT(z.dataType())); cudaResult = cudaStreamSynchronize(*lc->getCudaStream()); ASSERT_EQ(0, cudaResult); @@ -3036,7 +3036,7 @@ TEST_F(CudaBasicsTests1, execRandom_3) { // call cuda kernel which calculates result NativeOpExecutioner::execRandom(&lc, sd::random::UniformDistribution, &gen, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), devicePtrs[0]); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -3081,7 +3081,7 @@ TEST_F(CudaBasicsTests1, execRandom_4) { // call cuda kernel which calculates result NativeOpExecutioner::execRandom(context, sd::random::UniformDistribution, &gen, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extraArguments.argumentsAsT(z.dataType())); // cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp index 8a03d4abc..959362c4d 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp @@ -1718,17 +1718,17 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // auto inputBuffers = new Nd4jPointer[2]; // auto inputShapes = new Nd4jPointer[2]; -// inputBuffers[0] = (Nd4jPointer) x->getBuffer(); -// inputBuffers[1] = (Nd4jPointer) y->getBuffer(); +// inputBuffers[0] = (Nd4jPointer) x->buffer(); +// inputBuffers[1] = (Nd4jPointer) y->buffer(); -// inputShapes[0] = (Nd4jPointer) x->getShapeInfo(); -// inputShapes[1] = (Nd4jPointer) y->getShapeInfo(); +// inputShapes[0] = (Nd4jPointer) x->shapeInfo(); +// inputShapes[1] = (Nd4jPointer) y->shapeInfo(); // auto outputBuffers = new Nd4jPointer[1]; // auto outputShapes = new Nd4jPointer[1]; -// outputBuffers[0] = (Nd4jPointer) z->getBuffer(); -// outputShapes[0] = (Nd4jPointer) z->getShapeInfo(); +// outputBuffers[0] = (Nd4jPointer) z->buffer(); +// outputShapes[0] = (Nd4jPointer) z->shapeInfo(); // //auto status = execCustomOp(nullptr, hash, inputBuffers, inputShapes, 2, outputBuffers, outputShapes, 1, nullptr, 0, nullptr, 0, false); @@ -1768,11 +1768,11 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // auto inputBuffers = new Nd4jPointer[2]; // auto inputShapes = new Nd4jPointer[2]; -// inputBuffers[0] = (Nd4jPointer) x->getBuffer(); -// inputBuffers[1] = (Nd4jPointer) y->getBuffer(); +// inputBuffers[0] = (Nd4jPointer) x->buffer(); +// inputBuffers[1] = (Nd4jPointer) y->buffer(); -// inputShapes[0] = (Nd4jPointer) x->getShapeInfo(); -// inputShapes[1] = (Nd4jPointer) y->getShapeInfo(); +// inputShapes[0] = (Nd4jPointer) x->shapeInfo(); +// inputShapes[1] = (Nd4jPointer) y->shapeInfo(); // auto outputBuffers = new Nd4jPointer[1]; // auto outputShapes = new Nd4jPointer[1]; @@ -1811,9 +1811,9 @@ TEST_F(DeclarableOpsTests1, TestGemv1) { auto z = NDArrayFactory::create_('f', {5, 1}); auto expBuffer = new float[5]{28.00f,64.00f,100.00f,136.00f,172.00f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); - sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->getBuffer(), y->rows(), y->getBuffer(), 1, 0.0, z->getBuffer(), 1); + sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->buffer(), y->rows(), y->buffer(), 1, 0.0, z->buffer(), 1); ASSERT_TRUE(z->equalsTo(exp)); @@ -1930,8 +1930,8 @@ TEST_F(DeclarableOpsTests1, TestReductionShape1) { sd::ops::testreduction testop; - auto inP = new Nd4jLong[shape::shapeInfoLength(input->getShapeInfo())]; - memcpy(inP, input->getShapeInfo(), shape::shapeInfoByteLength(input->rankOf())); + auto inP = new Nd4jLong[shape::shapeInfoLength(input->shapeInfo())]; + memcpy(inP, input->shapeInfo(), shape::shapeInfoByteLength(input->rankOf())); auto inshape = new ShapeList(inP); @@ -1969,7 +1969,7 @@ TEST_F(DeclarableOpsTests1, TestReductionShape2) { sd::ops::testreduction testop; - auto inshapes = new ShapeList(input->getShapeInfo()); + auto inshapes = new ShapeList(input->shapeInfo()); auto shapes = testop.calculateOutputShape(inshapes, *block); ASSERT_EQ(1, shapes->size()); ASSERT_EQ(1, shapes->at(0)[0]); @@ -1994,14 +1994,14 @@ TEST_F(DeclarableOpsTests1, TestCustomShape1) { sd::ops::testcustom test; - auto inshapes = new ShapeList(input->getShapeInfo()); + auto inshapes = new ShapeList(input->shapeInfo()); auto shapes = test.calculateOutputShape(inshapes, *block); - ASSERT_EQ(input->getShapeInfo()[0], shapes->at(0)[0]); - ASSERT_EQ(input->getShapeInfo()[1] * 2, shapes->at(0)[1]); - ASSERT_EQ(input->getShapeInfo()[2] * 2, shapes->at(0)[2]); - ASSERT_EQ(input->getShapeInfo()[3] * 2, shapes->at(0)[3]); + ASSERT_EQ(input->shapeInfo()[0], shapes->at(0)[0]); + ASSERT_EQ(input->shapeInfo()[1] * 2, shapes->at(0)[1]); + ASSERT_EQ(input->shapeInfo()[2] * 2, shapes->at(0)[2]); + ASSERT_EQ(input->shapeInfo()[3] * 2, shapes->at(0)[3]); delete variableSpace; delete block; diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp index b8c89322c..963884c06 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp @@ -3677,7 +3677,7 @@ TEST_F(DeclarableOpsTests11, SafeDivideMixed_Test1) { NDArray labels('c', {2, 3}, {1.0, 2.0, 3.0, -1.0, 2.0, 1.0}); auto sumDiff = labels.reduceAlongDimension(reduce::Sum, {1}, true); - NDArray numOfNonZero(sumDiff.getShapeInfo(), sd::DataType::INT64, false); + NDArray numOfNonZero(sumDiff.shapeInfo(), sd::DataType::INT64, false); numOfNonZero.assign(1); sumDiff.applyPairwiseTransform(pairwise::SafeDivide, numOfNonZero, sumDiff); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp index 0684f7887..2bca43ae9 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp @@ -779,8 +779,8 @@ TEST_F(DeclarableOpsTests12, pullRows_1) { std::vector dims = {1}; - auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims); - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims); + auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims); + auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims); Nd4jPointer nativeStart[2]; @@ -789,8 +789,8 @@ TEST_F(DeclarableOpsTests12, pullRows_1) { #endif OpaqueDataBuffer xBuf(x.dataBuffer()); OpaqueDataBuffer zBuf(z.dataBuffer()); - pullRows(nativeStart, &xBuf, x.getShapeInfo(), x.getSpecialShapeInfo(), - &zBuf, z.getShapeInfo(), z.specialShapeInfo(), + pullRows(nativeStart, &xBuf, x.shapeInfo(), x.specialShapeInfo(), + &zBuf, z.shapeInfo(), z.specialShapeInfo(), 4, pidx, xTadPack.platformShapeInfo(), xTadPack.platformOffsets(), zTadPack.platformShapeInfo(), zTadPack.platformOffsets()); @@ -815,8 +815,8 @@ TEST_F(DeclarableOpsTests12, pullRows_2) { std::vector dims = {1}; - auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims); - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims); + auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims); + auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims); Nd4jPointer nativeStart[2]; #ifdef __CUDABLAS__ @@ -824,8 +824,8 @@ TEST_F(DeclarableOpsTests12, pullRows_2) { #endif OpaqueDataBuffer xBuf(x.dataBuffer()); OpaqueDataBuffer zBuf(z.dataBuffer()); - pullRows(nativeStart, &xBuf, x.getShapeInfo(), x.specialShapeInfo(), - &zBuf, z.getShapeInfo(), z.specialShapeInfo(), + pullRows(nativeStart, &xBuf, x.shapeInfo(), x.specialShapeInfo(), + &zBuf, z.shapeInfo(), z.specialShapeInfo(), 4, pidx, xTadPack.platformShapeInfo(), xTadPack.platformOffsets(), zTadPack.platformShapeInfo(), zTadPack.platformOffsets()); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp index 4052e260d..c37f3fe4a 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp @@ -3211,8 +3211,8 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test9) { int* dims = reinterpret_cast(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int))); input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions); NDArray::prepareSpecialUse({&variance}, {&input}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.getBuffer(), input.getShapeInfo(),input.getSpecialBuffer(), input.getSpecialShapeInfo(),nullptr,variance.getBuffer(), variance.getShapeInfo(),variance.getSpecialBuffer(), variance.getSpecialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); manager.synchronize(); NDArray::registerSpecialUse({&variance}, {&input}); @@ -3262,8 +3262,8 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test10) { int* dims = reinterpret_cast(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int))); input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions); NDArray::prepareSpecialUse({&variance}, {&input}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.getBuffer(), input.getShapeInfo(),input.getSpecialBuffer(), input.getSpecialShapeInfo(),nullptr,variance.getBuffer(), variance.getShapeInfo(),variance.getSpecialBuffer(), variance.getSpecialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); manager.synchronize(); NDArray::registerSpecialUse({&variance}, {&input}); @@ -3325,8 +3325,8 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test11) { int* dims = reinterpret_cast(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int))); input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions, true); NDArray::prepareSpecialUse({&variance}, {&input}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.getBuffer(), input.getShapeInfo(),input.getSpecialBuffer(), input.getSpecialShapeInfo(),nullptr,variance.getBuffer(), variance.getShapeInfo(),variance.getSpecialBuffer(), variance.getSpecialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); manager.synchronize(); NDArray::registerSpecialUse({&variance}, {&input}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp index 002e3376f..450b32bcc 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp @@ -156,7 +156,7 @@ TEST_F(DeclarableOpsTests6, Test_StridedSlice_Once_Again_04) { block->getIArguments()->push_back(1); block->getIArguments()->push_back(0); block->getIArguments()->push_back(0); - auto inputShapes = new ShapeList({ones->getShapeInfo(), b->getShapeInfo(), e->getShapeInfo(), s->getShapeInfo()}); + auto inputShapes = new ShapeList({ones->shapeInfo(), b->shapeInfo(), e->shapeInfo(), s->shapeInfo()}); sd::ops::strided_slice op; auto result = op.calculateOutputShape(inputShapes, *block); //execute({ones, &b, &e, &s}, {}, {0, 1, 0, 0, 0}); ASSERT_EQ(result->size(), 1); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp index c7e704a21..556ce3bb6 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp @@ -114,7 +114,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistributionInv_test1) { if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistributionInv_test1: RNG initialization failed !"); - functions::random::RandomFunction::template execTransform>(rng, x.getBuffer(), x.getShapeInfo(), extraParams); + functions::random::RandomFunction::template execTransform>(rng, x.getBuffer(), x.shapeInfo(), extraParams); const double actualMean = x.meanNumber().e(0); const double actualStd = x.varianceNumber(variance::SummaryStatsStandardDeviation, true).e(0); @@ -145,7 +145,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistributionInv_test2) { if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistributionInv_test2: RNG initialization failed !"); - functions::random::RandomFunction::template execTransform>(rng, y.getBuffer(), y.getShapeInfo(), x.getBuffer(), x.getShapeInfo(), extraParams); + functions::random::RandomFunction::template execTransform>(rng, y.getBuffer(), y.shapeInfo(), x.getBuffer(), x.shapeInfo(), extraParams); const double actualMean = x.meanNumber().e(0); const double actualStd = x.varianceNumber(variance::SummaryStatsStandardDeviation, true).e(0); @@ -174,7 +174,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistribution_test1) { if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistribution_test1: RNG initialization failed !"); - functions::random::RandomFunction::template execTransform>(rng, x.getBuffer(), x.getShapeInfo(), extraParams); + functions::random::RandomFunction::template execTransform>(rng, x.getBuffer(), x.shapeInfo(), extraParams); const double actualMean = x.meanNumber().e(0); const double actualStd = x.varianceNumber(variance::SummaryStatsStandardDeviation, true).e(0); @@ -207,7 +207,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistribution_test2) { if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistribution_test2: RNG initialization failed !"); - functions::random::RandomFunction::template execTransform>(rng, y.getBuffer(), y.getShapeInfo(), x.getBuffer(), x.getShapeInfo(), extraParams); + functions::random::RandomFunction::template execTransform>(rng, y.getBuffer(), y.shapeInfo(), x.getBuffer(), x.shapeInfo(), extraParams); destroyRandom((Nd4jPointer) rng); #endif @@ -539,7 +539,7 @@ TEST_F(DeclarableOpsTests9, concat_test14) { auto z = result.at(0); - Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->getShapeInfo(), {0}); + Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->shapeInfo(), {0}); ASSERT_TRUE(2 == numOfTads); for (int e = 0; e < numOfTads; ++e) { @@ -601,7 +601,7 @@ TEST_F(DeclarableOpsTests9, concat_test17) { // z->printShapeInfo(); // z->printIndexedBuffer(); - Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->getShapeInfo(), {0}); + Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->shapeInfo(), {0}); ASSERT_TRUE(2 == numOfTads); for (int e = 0; e < numOfTads; ++e) { @@ -680,7 +680,7 @@ TEST_F(DeclarableOpsTests9, concat_test20) { auto z = result.at(0); - Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->getShapeInfo(), {0}); + Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->shapeInfo(), {0}); ASSERT_TRUE(4 == numOfTads); for (int e = 0; e < numOfTads; e++) { diff --git a/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp b/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp index e6aeb43d4..81040185d 100644 --- a/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp @@ -216,7 +216,7 @@ TEST_F(EmptyTests, test_shaped_empty_3) { } TEST_F(EmptyTests, test_shaped_empty_4) { - auto shape = ConstantShapeHelper::getInstance()->vectorShapeInfo(0, sd::DataType::FLOAT32); + const auto shape = ConstantShapeHelper::getInstance()->vectorShapeInfo(0, sd::DataType::FLOAT32); NDArray array(shape, true, sd::LaunchContext::defaultContext()); std::vector shapeOf({0}); diff --git a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp index 679f0c5eb..e25bd0144 100644 --- a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp @@ -1440,7 +1440,7 @@ TEST_F(HelpersTests1, SVD_test17) { // auto outArr = NDArrayFactory::create('c', {2,5}); // // -// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.getShapeInfo(), outArr.getBuffer(), outArr.getShapeInfo()); +// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.shapeInfo(), outArr.getBuffer(), outArr.shapeInfo()); // // ASSERT_TRUE(outArr.equalsTo(&exp)); // ASSERT_TRUE(outArr.isSameShapeStrict(exp)); @@ -1454,7 +1454,7 @@ TEST_F(HelpersTests1, SVD_test17) { // auto exp = NDArrayFactory::create('c', {2,5}, {10,9,8,7,6,5,4,3,2,1}); // // -// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.getShapeInfo(), inArr.getBuffer(), inArr.getShapeInfo()); +// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.shapeInfo(), inArr.getBuffer(), inArr.shapeInfo()); // // ASSERT_TRUE(inArr.equalsTo(&exp)); // ASSERT_TRUE(inArr.isSameShapeStrict(exp)); @@ -1468,7 +1468,7 @@ TEST_F(HelpersTests1, SVD_test17) { // auto exp = NDArrayFactory::create('c', {2,5}, {5,4,3,2,1,6,7,8,9,10}); // auto outArr = NDArrayFactory::create('c', {2,5}); // -// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.getShapeInfo(), outArr.getBuffer(), outArr.getShapeInfo(), 5); +// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.shapeInfo(), outArr.getBuffer(), outArr.shapeInfo(), 5); // // ASSERT_TRUE(outArr.equalsTo(&exp)); // ASSERT_TRUE(outArr.isSameShapeStrict(exp)); diff --git a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp index 29c681544..e6992d7a2 100644 --- a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp @@ -46,7 +46,7 @@ TEST_F(JavaInteropTests, TestShapeExposure1) { std::vector tArgs({}); std::vector iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1}); - Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()}; + Nd4jPointer ptrs[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weights.shapeInfo()}; auto shapeList = calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 2, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size()); @@ -76,7 +76,7 @@ TEST_F(JavaInteropTests, TestShapeExposure2) { std::vector iArgs({}); - Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo()}; + Nd4jPointer ptrs[] = {(Nd4jPointer) input.shapeInfo()}; auto shapeList = calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 1, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size()); @@ -104,8 +104,8 @@ TEST_F(JavaInteropTests, TestShapeExposure3) { sub1.assign(1.0f); sub2.assign(2.0f); - Nd4jPointer inputBuffers[] = {x.buffer(), sizes.buffer(), x.getSpecialBuffer(), sizes.getSpecialBuffer()}; - Nd4jPointer inputShapes[] = {x.shapeInfo(), sizes.shapeInfo(), x.getSpecialShapeInfo(), sizes.getSpecialShapeInfo()}; + Nd4jPointer inputBuffers[] = {x.buffer(), sizes.buffer(), x.specialBuffer(), sizes.specialBuffer()}; + Nd4jPointer inputShapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)sizes.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)sizes.specialShapeInfo()}; sd::ops::split_v op; @@ -130,11 +130,11 @@ TEST_F(JavaInteropTests, Test_Squeeze_1) { sd::ops::squeeze op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); @@ -151,12 +151,12 @@ TEST_F(JavaInteropTests, Test_RDiv_1) { sd::ops::reversedivide op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), (Nd4jPointer) y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), (Nd4jPointer) y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), (Nd4jPointer) y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer) y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), (Nd4jPointer)z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); NDArray::registerSpecialUse({&z}, {&x, &y}); @@ -186,12 +186,12 @@ TEST_F(JavaInteropTests, TestSconv2d_1) { NDArray::prepareSpecialUse({&output}, {&input, &weightsD, &weightsP, &bias}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), (Nd4jPointer) weightsD.getBuffer(), (Nd4jPointer) weightsP.getBuffer(), (Nd4jPointer) bias.getBuffer(), (Nd4jPointer) input.getSpecialBuffer(), (Nd4jPointer) weightsD.getSpecialBuffer(), (Nd4jPointer) weightsP.getSpecialBuffer(), (Nd4jPointer) bias.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weightsD.getShapeInfo(), (Nd4jPointer) weightsP.getShapeInfo(), (Nd4jPointer) bias.getShapeInfo(), (Nd4jPointer) input.getSpecialShapeInfo(), (Nd4jPointer) weightsD.getSpecialShapeInfo(), (Nd4jPointer) weightsP.getSpecialShapeInfo(), (Nd4jPointer) bias.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), (Nd4jPointer) weightsD.buffer(), (Nd4jPointer) weightsP.buffer(), (Nd4jPointer) bias.buffer(), (Nd4jPointer) input.specialBuffer(), (Nd4jPointer) weightsD.specialBuffer(), (Nd4jPointer) weightsP.specialBuffer(), (Nd4jPointer) bias.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weightsD.shapeInfo(), (Nd4jPointer) weightsP.shapeInfo(), (Nd4jPointer) bias.shapeInfo(), (Nd4jPointer) input.specialShapeInfo(), (Nd4jPointer) weightsD.specialShapeInfo(), (Nd4jPointer) weightsP.specialShapeInfo(), (Nd4jPointer) bias.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), (Nd4jPointer) output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), (Nd4jPointer) output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), (Nd4jPointer) output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer) output.specialShapeInfo()}; Nd4jLong exp[] = {1, 1, 1, 1, 0, 0, 1, 1, 0, 0}; @@ -221,12 +221,12 @@ TEST_F(JavaInteropTests, TestSconv2d_2) { NDArray::prepareSpecialUse({&output}, {&input, &weightsD}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), (Nd4jPointer) weightsD.getBuffer(), input.getSpecialBuffer(), weightsD.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weightsD.getShapeInfo(), input.getSpecialShapeInfo(), weightsD.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), (Nd4jPointer) weightsD.buffer(), input.specialBuffer(), weightsD.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weightsD.shapeInfo(), (Nd4jPointer)input.specialShapeInfo(), (Nd4jPointer)weightsD.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; Nd4jLong exp[] = {1, 1, 1, 1, 0, 0, 1, 1, 0}; @@ -245,11 +245,11 @@ TEST_F(JavaInteropTests, TestMaxPooling2d_1) { NDArray::prepareSpecialUse({&output}, {&input}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; std::vector iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1}); @@ -276,11 +276,11 @@ TEST_F(JavaInteropTests, TestCol2Im_1) { NDArray::prepareSpecialUse({&output}, {&input}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; sd::ops::col2im op; @@ -316,11 +316,11 @@ TEST_F(JavaInteropTests, TestPNorm_1) { Nd4jLong exp[] = {2, 2, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0}; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false); @@ -342,8 +342,8 @@ TEST_F(JavaInteropTests, TestInplace_1) { double extras[] = {-1.0f, 1.0f}; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; Nd4jStatus result = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, nullptr, nullptr, 0, extras, 2, nullptr, 0, nullptr, 0, true); @@ -482,11 +482,11 @@ TEST_F(JavaInteropTests, test_avgpooling_edge_1) { Nd4jLong exp[] = {3,3, 1,1, 0,0, 1,1, 1, 0, 1}; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), z.specialShapeInfo()}; auto result = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false); @@ -669,11 +669,11 @@ TEST_F(JavaInteropTests, Test_Greater_1) { sd::ops::greater op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), (Nd4jPointer) y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), (Nd4jPointer) y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), (Nd4jPointer) y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer) y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.getBuffer(), o.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.getShapeInfo(), o.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.buffer(), o.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.shapeInfo(), (Nd4jPointer)o.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -693,11 +693,11 @@ TEST_F(JavaInteropTests, Test_Greater_2) { NDArray::prepareSpecialUse({&o}, {&x, &y}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), (Nd4jPointer) y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), (Nd4jPointer) y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), (Nd4jPointer) y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer) y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.getBuffer(), o.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.getShapeInfo(), o.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.buffer(), o.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.shapeInfo(), (Nd4jPointer)o.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -716,11 +716,11 @@ TEST_F(JavaInteropTests, Test_Boolean_Op_1) { NDArray::prepareSpecialUse({&o}, {&x}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.getBuffer(), o.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.getShapeInfo(), o.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.buffer(), o.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.shapeInfo(), (Nd4jPointer)o.specialShapeInfo()}; auto hash = op.getOpHash(); auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -741,11 +741,11 @@ TEST_F(JavaInteropTests, Test_Inplace_Outputs_1) { NDArray::prepareSpecialUse({&z}, {&x}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto hash = op.getOpHash(); auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -769,11 +769,11 @@ TEST_F(JavaInteropTests, Test_Inplace_Outputs_2) { NDArray::prepareSpecialUse({&z}, {&x, &y}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), (Nd4jPointer) y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), (Nd4jPointer) y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), (Nd4jPointer) y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer) y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto hash = op.getOpHash(); auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -796,11 +796,11 @@ TEST_F(JavaInteropTests, Test_Inplace_Outputs_3) { NDArray::prepareSpecialUse({&output}, {&input, &indices}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), (Nd4jPointer) indices.getBuffer(), input.getSpecialBuffer(), indices.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) indices.getShapeInfo(), input.getSpecialShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), (Nd4jPointer) indices.buffer(), input.specialBuffer(), indices.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) indices.shapeInfo(), (Nd4jPointer)input.specialShapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; Nd4jLong iArgs[] = {1}; @@ -830,8 +830,8 @@ TEST_F(JavaInteropTests, Test_Reduce3_EdgeCase) { extraPointers = new Nd4jPointer[6] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {0,1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {0,1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {0,1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {0,1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dims}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -877,11 +877,11 @@ TEST_F(JavaInteropTests, Test_AveragePooling_FF_TF_double) { NDArray::prepareSpecialUse({&z}, {&input}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(input.shapeInfo()), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo()), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; Nd4jLong iArgs[] = {3,3, 3,3, 0,0, 1,1,1, 0,1}; @@ -903,11 +903,11 @@ TEST_F(JavaInteropTests, Test_MaxPool2D_float_1) { NDArray::prepareSpecialUse({&z}, {&input}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(input.shapeInfo()), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo()), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; Nd4jLong iArgs[] = {2,2, 1,1, 1,1, 2,2,1, 0,0}; @@ -931,11 +931,14 @@ TEST_F(JavaInteropTests, Test_Unstack_1) { NDArray::prepareSpecialUse({&z0, &z1, &z2, &z3, &z4}, {&x}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(x.buffer()), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(x.shapeInfo()), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(x.buffer()), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {z0.buffer(), z1.buffer(), z2.buffer(), z3.buffer(), z4.buffer(), z0.getSpecialBuffer(), z1.getSpecialBuffer(), z2.getSpecialBuffer(), z3.getSpecialBuffer(), z4.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {z0.shapeInfo(), z1.shapeInfo(), z2.shapeInfo(), z3.shapeInfo(), z4.shapeInfo(), z0.getSpecialShapeInfo(), z1.getSpecialShapeInfo(), z2.getSpecialShapeInfo(), z3.getSpecialShapeInfo(), z4.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {z0.buffer(), z1.buffer(), z2.buffer(), z3.buffer(), z4.buffer(), z0.specialBuffer(), z1.specialBuffer(), z2.specialBuffer(), z3.specialBuffer(), z4.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z0.shapeInfo(), (Nd4jPointer)z1.shapeInfo(), (Nd4jPointer)z2.shapeInfo(), + (Nd4jPointer)z3.shapeInfo(), (Nd4jPointer)z4.shapeInfo(), (Nd4jPointer)z0.specialShapeInfo(), + (Nd4jPointer)z1.specialShapeInfo(), (Nd4jPointer)z2.specialShapeInfo(), + (Nd4jPointer)z3.specialShapeInfo(), (Nd4jPointer)z4.specialShapeInfo()}; Nd4jLong iArgs[] = {0}; @@ -958,11 +961,11 @@ TEST_F(JavaInteropTests, Test_AveragePooling_FF_TF_float) { NDArray::prepareSpecialUse({&z}, {&input}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(input.shapeInfo()), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo()), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; Nd4jLong iArgs[] = {3,3, 3,3, 0,0, 1,1,1, 0,1}; auto hash = op.getOpHash(); @@ -991,9 +994,9 @@ TEST_F(JavaInteropTests, Test_Mixed_Add_1) { OpaqueDataBuffer zBuf(arrayZ.dataBuffer()); execPairwiseTransform(nullptr, pairwise::Add, - &xBuf, arrayX.shapeInfo(), arrayX.getSpecialShapeInfo(), - &yBuf, arrayY.shapeInfo(), arrayY.getSpecialShapeInfo(), - &zBuf, arrayZ.shapeInfo(), arrayZ.getSpecialShapeInfo(), + &xBuf, arrayX.shapeInfo(), arrayX.specialShapeInfo(), + &yBuf, arrayY.shapeInfo(), arrayY.specialShapeInfo(), + &zBuf, arrayZ.shapeInfo(), arrayZ.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arrayZ}, {&arrayX, &arrayY}); @@ -1010,11 +1013,11 @@ TEST_F(JavaInteropTests, Test_Add_1) { sd::ops::add op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo(),}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -1035,11 +1038,11 @@ TEST_F(JavaInteropTests, zeta_test10) { NDArray::prepareSpecialUse({&z}, {&x, &q}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), q.getBuffer(), x.getSpecialBuffer(), q.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), q.getShapeInfo(), x.specialShapeInfo(), q.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), q.buffer(), x.specialBuffer(), q.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)q.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)q.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -1060,8 +1063,8 @@ TEST_F(JavaInteropTests, Test_Boolean_Broadcastables_1) { auto arrayX = NDArrayFactory::create('c', {10, 10}); auto arrayY = NDArrayFactory::create('c', {10, 10}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(arrayX.buffer()), reinterpret_cast(arrayY.buffer()), arrayX.getSpecialBuffer(), arrayY.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(arrayX.shapeInfo()), reinterpret_cast(arrayY.shapeInfo()), arrayX.getSpecialShapeInfo(), arrayY.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(arrayX.buffer()), reinterpret_cast(arrayY.buffer()), arrayX.specialBuffer(), arrayY.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)arrayX.shapeInfo(), (Nd4jPointer)arrayY.shapeInfo(), (Nd4jPointer)arrayX.specialShapeInfo(), (Nd4jPointer)arrayY.specialShapeInfo()}; NDArray::prepareSpecialUse({}, {&arrayX, &arrayY}); sd::ops::greater_equal op; @@ -1077,11 +1080,11 @@ TEST_F(JavaInteropTests, Test_L2_Loss_3) { NDArray::prepareSpecialUse({&z}, {&x}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(x.buffer()), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(x.shapeInfo()), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(x.buffer()), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffer[] = {reinterpret_cast(z.buffer()), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo()), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffer[] = {reinterpret_cast(z.buffer()), (Nd4jPointer)z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; sd::ops::l2_loss op; auto status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffer, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -1102,9 +1105,9 @@ TEST_F(JavaInteropTests, Test_Fastpath_3) { NDArray::prepareSpecialUse({&z}, {&array0, &array1}); - ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.getSpecialBuffer(), array0.getSpecialShapeInfo()); - ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.getSpecialBuffer(), array1.getSpecialShapeInfo()); - ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()); + ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.specialBuffer(), array0.specialShapeInfo()); + ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.specialBuffer(), array1.specialShapeInfo()); + ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); ASSERT_EQ(2, ctx.width()); diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp index dae5ba5b9..7c7734b38 100644 --- a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp @@ -466,8 +466,8 @@ TEST_F(LegacyOpsTests, Reduce3_2) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -506,8 +506,8 @@ TEST_F(LegacyOpsTests, Reduce3_3) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -546,8 +546,8 @@ TEST_F(LegacyOpsTests, Reduce3_4) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -588,8 +588,8 @@ TEST_F(LegacyOpsTests, Reduce3_5) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); @@ -707,7 +707,7 @@ TEST_F(LegacyOpsTests, test_legacy_reduce_empty_1) { x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - &dim, 1, x.getPlatformShapeInfo(), nullptr); + &dim, 1, x.platformShapeInfo(), nullptr); ASSERT_EQ(e, z); } @@ -720,7 +720,7 @@ TEST_F(LegacyOpsTests, test_legacy_reduce_empty_2) { int dim = 1; - NativeOpExecutioner::execReduceSame(LaunchContext::defaultContext(), reduce::SameOps::Min, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &dim, 1, x.getPlatformShapeInfo(), nullptr); + NativeOpExecutioner::execReduceSame(LaunchContext::defaultContext(), reduce::SameOps::Min, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &dim, 1, x.platformShapeInfo(), nullptr); ASSERT_EQ(e, z); } @@ -733,7 +733,7 @@ TEST_F(LegacyOpsTests, test_legacy_reduce_empty_3) { int dim = 1; - NativeOpExecutioner::execReduceSame(LaunchContext::defaultContext(), reduce::SameOps::Max, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &dim, 1, x.getPlatformShapeInfo(), nullptr); + NativeOpExecutioner::execReduceSame(LaunchContext::defaultContext(), reduce::SameOps::Max, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &dim, 1, x.platformShapeInfo(), nullptr); ASSERT_EQ(e, z); } diff --git a/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp b/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp index db342771e..803029216 100644 --- a/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp @@ -1962,7 +1962,7 @@ TEST_F(MultiDataTypeTests, aaa) { NativeOpExecutioner::execRandom(LaunchContext::defaultContext(), sd::random::UniformDistribution, &gen, - z.buffer(), z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extras.argumentsAsT()); // z.printIndexedBuffer(); diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayConstructorsTests.cu b/libnd4j/tests_cpu/layers_tests/NDArrayConstructorsTests.cu index 48208d2ff..24ac087d1 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayConstructorsTests.cu +++ b/libnd4j/tests_cpu/layers_tests/NDArrayConstructorsTests.cu @@ -198,9 +198,9 @@ TEST_F(NDArrayConstructorsTests, test_constructor_10) { ASSERT_TRUE(scalar2.isActualOnDeviceSide()); ASSERT_TRUE(scalar2.isActualOnHostSide()); - ASSERT_TRUE(scalar1.getBuffer() == nullptr); - ASSERT_TRUE(scalar1.getSpecialBuffer() != nullptr); - ASSERT_TRUE(scalar1.getShapeInfo() != nullptr); - ASSERT_TRUE(scalar1.getSpecialShapeInfo() != nullptr); + ASSERT_TRUE(scalar1.buffer() == nullptr); + ASSERT_TRUE(scalar1.specialBuffer() != nullptr); + ASSERT_TRUE(scalar1.shapeInfo() != nullptr); + ASSERT_TRUE(scalar1.specialShapeInfo() != nullptr); ASSERT_TRUE(scalar1.lengthOf() == 1); } \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu index 6c37e3145..f95705f08 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu +++ b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu @@ -700,7 +700,7 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -722,9 +722,9 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_2) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(&lc, sd::broadcast::Multiply, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -760,7 +760,7 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -788,9 +788,9 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_3) { NDArray::registerSpecialUse({&z}, {&x, &y}); // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(pLc, sd::broadcast::Multiply, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -958,7 +958,7 @@ TEST_F(NDArrayCudaBasicsTests, TestBroadcastRaw_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -984,9 +984,9 @@ TEST_F(NDArrayCudaBasicsTests, TestBroadcastRaw_1) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(pLc, sd::broadcast::Add, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1949,7 +1949,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(xExp.isSameShape(x0)); ASSERT_TRUE(xExp.equalsTo(x0)); // for(int i = 0; i < shape::shapeInfoLength(x0.rankOf()); ++i) -// ASSERT_TRUE(x0.getShapeInfo()[i] == shapeExpX0[i]); +// ASSERT_TRUE(x0.shapeInfo()[i] == shapeExpX0[i]); // for(int i = 0; i < x0.lengthOf(); ++i) // ASSERT_TRUE(x0.e(i) == buffExpX0[i]); @@ -1959,7 +1959,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(x1Exp.equalsTo(x1)); // for(int i = 0; i < shape::shapeInfoLength(x1.rankOf()); ++i) -// ASSERT_TRUE(x1.getShapeInfo()[i] == shapeExpX1[i]); +// ASSERT_TRUE(x1.shapeInfo()[i] == shapeExpX1[i]); // for(int i = 0; i < x1.lengthOf(); ++i) // ASSERT_TRUE(x1.e(i) == buffExpX1[i]); @@ -1970,7 +1970,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) // x2Exp.printBuffer("X2 EXPECT"); ASSERT_TRUE(x2Exp.equalsTo(x2)); // for(int i = 0; i < shape::shapeInfoLength(x2.rankOf()); ++i) -// ASSERT_TRUE(x2.getShapeInfo()[i] == shapeExpX2[i]); +// ASSERT_TRUE(x2.shapeInfo()[i] == shapeExpX2[i]); // for(int i = 0; i < x2.lengthOf(); ++i) // ASSERT_TRUE(x2.e(i) == buffExpX2[i]); @@ -1979,7 +1979,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(x3Exp.isSameShape(x3)); ASSERT_TRUE(x3Exp.equalsTo(x3)); // for(int i = 0; i < shape::shapeInfoLength(x3.rankOf()); ++i) -// ASSERT_TRUE(x3.getShapeInfo()[i] == shapeExpX3[i]); +// ASSERT_TRUE(x3.shapeInfo()[i] == shapeExpX3[i]); // for(int i = 0; i < x3.lengthOf(); ++i) // ASSERT_TRUE(x3.e(i) == buffExpX3[i]); @@ -1988,7 +1988,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(x4Exp.isSameShape(x4)); ASSERT_TRUE(x4Exp.equalsTo(x4)); // for(int i = 0; i < shape::shapeInfoLength(x4.rankOf()); ++i) -// ASSERT_TRUE(x4.getShapeInfo()[i] == shapeExpX4[i]); +// ASSERT_TRUE(x4.shapeInfo()[i] == shapeExpX4[i]); // for(int i = 0; i < x4.lengthOf(); ++i) // ASSERT_TRUE(x4.e(i) == buffExpX4[i]); @@ -1998,7 +1998,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(x5Exp.equalsTo(x5)); // for(int i = 0; i < shape::shapeInfoLength(x5.rankOf()); ++i) -// ASSERT_TRUE(x5.getShapeInfo()[i] == shapeExpX5[i]); +// ASSERT_TRUE(x5.shapeInfo()[i] == shapeExpX5[i]); // for(int i = 0; i < x5.lengthOf(); ++i) // ASSERT_TRUE(x5.e(i) == buffExpX5[i]); @@ -2008,7 +2008,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y0Exp.isSameShape(y0)); ASSERT_TRUE(y0Exp.equalsTo(y0)); // for(int i = 0; i < shape::shapeInfoLength(y0.rankOf()); ++i) -// ASSERT_TRUE(y0.getShapeInfo()[i] == shapeExpY0[i]); +// ASSERT_TRUE(y0.shapeInfo()[i] == shapeExpY0[i]); // for(int i = 0; i < y0.lengthOf(); ++i) // ASSERT_TRUE(y0.e(i) == buffExpY0[i]); @@ -2017,7 +2017,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y1Exp.isSameShape(y1)); ASSERT_TRUE(y1Exp.equalsTo(y1)); // for(int i = 0; i < shape::shapeInfoLength(y1.rankOf()); ++i) -// ASSERT_TRUE(y1.getShapeInfo()[i] == shapeExpY1[i]); +// ASSERT_TRUE(y1.shapeInfo()[i] == shapeExpY1[i]); // for(int i = 0; i < y1.lengthOf(); ++i) // ASSERT_TRUE(y1.e(i) == buffExpY1[i]); @@ -2026,7 +2026,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y2Exp.isSameShape(y2)); ASSERT_TRUE(y2Exp.equalsTo(y2)); // for(int i = 0; i < shape::shapeInfoLength(y2.rankOf()); ++i) -// ASSERT_TRUE(y2.getShapeInfo()[i] == shapeExpY2[i]); +// ASSERT_TRUE(y2.shapeInfo()[i] == shapeExpY2[i]); // for(int i = 0; i < y2.lengthOf(); ++i) // ASSERT_TRUE(y2.e(i) == buffExpY2[i]); @@ -2035,7 +2035,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y3Exp.isSameShape(y3)); ASSERT_TRUE(y3Exp.equalsTo(y3)); // for(int i = 0; i < shape::shapeInfoLength(y3.rankOf()); ++i) -// ASSERT_TRUE(y3.getShapeInfo()[i] == shapeExpY3[i]); +// ASSERT_TRUE(y3.shapeInfo()[i] == shapeExpY3[i]); // for(int i = 0; i < y3.lengthOf(); ++i) // ASSERT_TRUE(y3.e(i) == buffExpY3[i]); @@ -2044,7 +2044,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y4Exp.isSameShape(y4)); ASSERT_TRUE(y4Exp.equalsTo(y4)); // for(int i = 0; i < shape::shapeInfoLength(y4.rankOf()); ++i) -// ASSERT_TRUE(y4.getShapeInfo()[i] == shapeExpY4[i]); +// ASSERT_TRUE(y4.shapeInfo()[i] == shapeExpY4[i]); // for(int i = 0; i < y4.lengthOf(); ++i) // ASSERT_TRUE(y4.e(i) == buffExpY4[i]); @@ -2053,7 +2053,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y5Exp.isSameShape(y5)); ASSERT_TRUE(y5Exp.equalsTo(y5)); // for(int i = 0; i < shape::shapeInfoLength(y5.rankOf()); ++i) -// ASSERT_TRUE(y5.getShapeInfo()[i] == shapeExpY5[i]); +// ASSERT_TRUE(y5.shapeInfo()[i] == shapeExpY5[i]); // for(int i = 0; i < y5.lengthOf(); ++i) // ASSERT_TRUE(y5.e(i) == buffExpY5[i]); @@ -2077,9 +2077,9 @@ TEST_F(NDArrayCudaBasicsTests, Test_diagonal_1) { NDArray tmp(sd::DataType::FLOAT32, x.getContext()); // scalar = 0 ExtraArguments extras({eps}); - NativeOpExecutioner::execReduce3Scalar(diag.getContext(), reduce3::EqualsWithEps, diag.getBuffer(), - diag.getShapeInfo(), diag.getSpecialBuffer(), diag.getSpecialShapeInfo(), extras.argumentsAsT(sd::DataType::FLOAT32), - exp.getBuffer(), exp.getShapeInfo(), exp.getSpecialBuffer(), exp.getSpecialShapeInfo(), + NativeOpExecutioner::execReduce3Scalar(diag.getContext(), reduce3::EqualsWithEps, diag.buffer(), + diag.shapeInfo(), diag.specialBuffer(), diag.specialShapeInfo(), extras.argumentsAsT(sd::DataType::FLOAT32), + exp.buffer(), exp.shapeInfo(), exp.specialBuffer(), exp.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo()); cudaStream_t* stream = x.getContext()->getCudaStream(); auto res = cudaStreamSynchronize(*stream); diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp index 5e6cd10fb..669574fa7 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp @@ -99,7 +99,7 @@ TEST_F(NDArrayTest, NDArrayOrder1) { } for (int i = 0; i < 8; i++) { - ASSERT_EQ(fShape[i], arrayF->getShapeInfo()[i]); + ASSERT_EQ(fShape[i], arrayF->shapeInfo()[i]); } for (int i = 0; i < 4; i++) { @@ -107,7 +107,7 @@ TEST_F(NDArrayTest, NDArrayOrder1) { } for (int i = 0; i < 8; i++) { - ASSERT_EQ(cShape[i], arrayC2->getShapeInfo()[i]); + ASSERT_EQ(cShape[i], arrayC2->shapeInfo()[i]); } @@ -237,13 +237,13 @@ TEST_F(NDArrayTest, TestPermuteReshape1) { array.permutei({1, 2, 3, 0}); - for (int e = 0; e < shape::shapeInfoLength(array.getShapeInfo()); e++) - ASSERT_EQ(pShape[e], array.getShapeInfo()[e]); + for (int e = 0; e < shape::shapeInfoLength(array.shapeInfo()); e++) + ASSERT_EQ(pShape[e], array.shapeInfo()[e]); array.reshapei('c', {2, 25, 2}); - for (int e = 0; e < shape::shapeInfoLength(array.getShapeInfo()); e++) - ASSERT_EQ(rShape[e], array.getShapeInfo()[e]); + for (int e = 0; e < shape::shapeInfoLength(array.shapeInfo()); e++) + ASSERT_EQ(rShape[e], array.shapeInfo()[e]); } @@ -259,15 +259,15 @@ TEST_F(NDArrayTest, TestPermuteReshape2) { // array.printShapeInfo("after "); - auto aShape = array.getShapeInfo(); + auto aShape = array.shapeInfo(); - for (int e = 0; e < shape::shapeInfoLength(array.getShapeInfo()); e++) + for (int e = 0; e < shape::shapeInfoLength(array.shapeInfo()); e++) ASSERT_EQ(pShape[e], aShape[e]); array.reshapei('c', {2, 72, 25}); - for (int e = 0; e < shape::shapeInfoLength(array.getShapeInfo()); e++) - ASSERT_EQ(rShape[e], array.getShapeInfo()[e]); + for (int e = 0; e < shape::shapeInfoLength(array.shapeInfo()); e++) + ASSERT_EQ(rShape[e], array.shapeInfo()[e]); } ////////////////////////////////////////////////////////////////////// @@ -947,9 +947,9 @@ TEST_F(NDArrayTest, TestMmulHelper2) { auto z = NDArrayFactory::create_('f', {5, 1}); auto expBuffer = new float[5]{28.00f, 64.00f, 100.00f, 136.00f, 172.00f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo(), sd::LaunchContext ::defaultContext(), true); + auto exp = new NDArray(expBuffer, z->shapeInfo(), sd::LaunchContext ::defaultContext(), true); - //sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->getBuffer(), y->rows(), y->getBuffer(), 1, 0.0, z->getBuffer(), 1); + //sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->buffer(), y->rows(), y->buffer(), 1, 0.0, z->buffer(), 1); MmulHelper::mmul(x, y, z); @@ -976,9 +976,9 @@ TEST_F(NDArrayTest, TestMmulHelper3) { auto z = NDArrayFactory::create_('f', {5, 1}); auto expBuffer = new float[5]{92.00f, 104.00f, 116.00f, 128.00f, 140.00f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); - //sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->getBuffer(), y->rows(), y->getBuffer(), 1, 0.0, z->getBuffer(), 1); + //sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->buffer(), y->rows(), y->buffer(), 1, 0.0, z->buffer(), 1); MmulHelper::mmul(x, y, z); @@ -1011,7 +1011,7 @@ TEST_F(NDArrayTest, TestMmulHelper4) { auto z = NDArrayFactory::create_('f', {3, 3}); auto expBuffer = new float[9]{7.0f, 21.0f, 35.0f, 10.0f, 28.0f, 46.0f, 13.0f, 35.0f, 57.0f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); MmulHelper::mmul(x, y, z); ASSERT_TRUE(z->equalsTo(exp)); @@ -1041,7 +1041,7 @@ TEST_F(NDArrayTest, TestMmulHelper5) { auto z = NDArrayFactory::create_('f', {3, 3}); auto expBuffer = new float[9]{7.0f, 14.0f, 21.0f, 12.0f, 21.0f, 30.0f, 17.0f, 28.0f, 39.0f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); MmulHelper::mmul(x, y, z); ASSERT_TRUE(z->equalsTo(exp)); @@ -1071,7 +1071,7 @@ TEST_F(NDArrayTest, TestMmulHelper6) { auto z = NDArrayFactory::create_('f', {3, 3}); auto expBuffer = new float[9]{39.0f, 54.0f, 69.0f, 9.0f, 18.0f, 27.0f, 9.0f, 12.0f, 15.0f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); MmulHelper::mmul(x, y, z); ASSERT_TRUE(z->equalsTo(exp)); @@ -1102,7 +1102,7 @@ TEST_F(NDArrayTest, TestMmulHelper7) { auto z = NDArrayFactory::create_('f', {1, 3}); auto expBuffer = new float[9]{110.00f, 260.00f, 410.00f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); MmulHelper::mmul(y, x, z); @@ -1301,7 +1301,7 @@ TEST_F(NDArrayTest, TestIndexedPut2) { x.p(1, 1.0f); //x.printBuffer("after"); - ASSERT_NEAR(reinterpret_cast(x.getBuffer())[2], 1.0, 1e-5); + ASSERT_NEAR(reinterpret_cast(x.buffer())[2], 1.0, 1e-5); } TEST_F(NDArrayTest, TestIndexedPut3) { @@ -1309,7 +1309,7 @@ TEST_F(NDArrayTest, TestIndexedPut3) { x.p(1, 1.0f); //x.printBuffer("after"); - ASSERT_NEAR(reinterpret_cast(x.getBuffer())[1], 1.0, 1e-5); + ASSERT_NEAR(reinterpret_cast(x.buffer())[1], 1.0, 1e-5); } TEST_F(NDArrayTest, TestIndexedPut4) { @@ -1317,7 +1317,7 @@ TEST_F(NDArrayTest, TestIndexedPut4) { x.p(0, 1, 1.0f); //x.printBuffer("after"); - ASSERT_NEAR(reinterpret_cast(x.getBuffer())[2], 1.0, 1e-5); + ASSERT_NEAR(reinterpret_cast(x.buffer())[2], 1.0, 1e-5); } diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp b/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp index 49f003809..4dd4c3abe 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp +++ b/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp @@ -974,74 +974,74 @@ TEST_F(NDArrayTest2, subarray_1) { NDArray x0 = x(0, {1,2}); for(int i = 0; i < shape::shapeInfoLength(x0.rankOf()); ++i) - ASSERT_TRUE(x0.getShapeInfo()[i] == shapeExpX0[i]); + ASSERT_TRUE(x0.shapeInfo()[i] == shapeExpX0[i]); for(int i = 0; i < x0.lengthOf(); ++i) ASSERT_TRUE(x0.e(i) == buffExpX0[i]); NDArray x1 = x(1, {1,2}); for(int i = 0; i < shape::shapeInfoLength(x1.rankOf()); ++i) - ASSERT_TRUE(x1.getShapeInfo()[i] == shapeExpX0[i]); + ASSERT_TRUE(x1.shapeInfo()[i] == shapeExpX0[i]); for(int i = 0; i < x1.lengthOf(); ++i) ASSERT_TRUE(x1.e(i) == buffExpX1[i]); NDArray x2 = x(0, {1,2}, true); for(int i = 0; i < shape::shapeInfoLength(x2.rankOf()); ++i) - ASSERT_TRUE(x2.getShapeInfo()[i] == shapeExpX2[i]); + ASSERT_TRUE(x2.shapeInfo()[i] == shapeExpX2[i]); for(int i = 0; i < x2.lengthOf(); ++i) ASSERT_TRUE(x2.e(i) == buffExpX2[i]); NDArray x3 = x(2, {1}); for(int i = 0; i < shape::shapeInfoLength(x3.rankOf()); ++i) - ASSERT_TRUE(x3.getShapeInfo()[i] == shapeExpX3[i]); + ASSERT_TRUE(x3.shapeInfo()[i] == shapeExpX3[i]); for(int i = 0; i < x3.lengthOf(); ++i) ASSERT_TRUE(x3.e(i) == buffExpX3[i]); NDArray x4 = x(2, {1}, true); for(int i = 0; i < shape::shapeInfoLength(x4.rankOf()); ++i) - ASSERT_TRUE(x4.getShapeInfo()[i] == shapeExpX4[i]); + ASSERT_TRUE(x4.shapeInfo()[i] == shapeExpX4[i]); for(int i = 0; i < x4.lengthOf(); ++i) ASSERT_TRUE(x4.e(i) == buffExpX4[i]); NDArray x5 = x(3, {2}); for(int i = 0; i < shape::shapeInfoLength(x5.rankOf()); ++i) - ASSERT_TRUE(x5.getShapeInfo()[i] == shapeExpX5[i]); + ASSERT_TRUE(x5.shapeInfo()[i] == shapeExpX5[i]); for(int i = 0; i < x5.lengthOf(); ++i) ASSERT_TRUE(x5.e(i) == buffExpX5[i]); // ******************* // NDArray y0 = y(0, {1,2}); for(int i = 0; i < shape::shapeInfoLength(y0.rankOf()); ++i) - ASSERT_TRUE(y0.getShapeInfo()[i] == shapeExpY0[i]); + ASSERT_TRUE(y0.shapeInfo()[i] == shapeExpY0[i]); for(int i = 0; i < y0.lengthOf(); ++i) ASSERT_TRUE(y0.e(i) == buffExpY0[i]); NDArray y1 = y(1, {1,2}); for(int i = 0; i < shape::shapeInfoLength(y1.rankOf()); ++i) - ASSERT_TRUE(y1.getShapeInfo()[i] == shapeExpY0[i]); + ASSERT_TRUE(y1.shapeInfo()[i] == shapeExpY0[i]); for(int i = 0; i < y1.lengthOf(); ++i) ASSERT_TRUE(y1.e(i) == buffExpY1[i]); NDArray y2 = y(0, {1,2}, true); for(int i = 0; i < shape::shapeInfoLength(y2.rankOf()); ++i) - ASSERT_TRUE(y2.getShapeInfo()[i] == shapeExpY2[i]); + ASSERT_TRUE(y2.shapeInfo()[i] == shapeExpY2[i]); for(int i = 0; i < y2.lengthOf(); ++i) ASSERT_TRUE(y2.e(i) == buffExpY2[i]); NDArray y3 = y(2, {1}); for(int i = 0; i < shape::shapeInfoLength(y3.rankOf()); ++i) - ASSERT_TRUE(y3.getShapeInfo()[i] == shapeExpY3[i]); + ASSERT_TRUE(y3.shapeInfo()[i] == shapeExpY3[i]); for(int i = 0; i < y3.lengthOf(); ++i) ASSERT_TRUE(y3.e(i) == buffExpY3[i]); NDArray y4 = y(2, {1}, true); for(int i = 0; i < shape::shapeInfoLength(y4.rankOf()); ++i) - ASSERT_TRUE(y4.getShapeInfo()[i] == shapeExpY4[i]); + ASSERT_TRUE(y4.shapeInfo()[i] == shapeExpY4[i]); for(int i = 0; i < y4.lengthOf(); ++i) ASSERT_TRUE(y4.e(i) == buffExpY4[i]); NDArray y5 = y(3, {2}); for(int i = 0; i < shape::shapeInfoLength(y5.rankOf()); ++i) - ASSERT_TRUE(y5.getShapeInfo()[i] == shapeExpY5[i]); + ASSERT_TRUE(y5.shapeInfo()[i] == shapeExpY5[i]); for(int i = 0; i < y5.lengthOf(); ++i) ASSERT_TRUE(y5.e(i) == buffExpY5[i]); @@ -1171,7 +1171,7 @@ TEST_F(NDArrayTest2, reshapei_1) { const bool canReshape = x.reshapei({4,7}); ASSERT_FALSE(canReshape); - ASSERT_TRUE(shape::equalsStrict(x.getShapeInfo(), shapeInfo2)); + ASSERT_TRUE(shape::equalsStrict(x.shapeInfo(), shapeInfo2)); delete[] buffer; } @@ -1188,7 +1188,7 @@ TEST_F(NDArrayTest2, reshapei_2) { const bool canReshape = x.reshapei({4,7}); ASSERT_FALSE(canReshape); - ASSERT_TRUE(shape::equalsStrict(x.getShapeInfo(), shapeInfo2)); + ASSERT_TRUE(shape::equalsStrict(x.shapeInfo(), shapeInfo2)); delete[] buffer; } @@ -1225,8 +1225,8 @@ TEST_F(NDArrayTest2, reduce_1) { for (int x = 0; x < 4; x++) { for (int y = 0; y < 4; y++) { Nd4jLong indices[] = {0, 0, x, y, i, j}; - Nd4jLong offset = shape::getOffset(arr6.getShapeInfo(), indices); - sum += ((double*)arr6.getBuffer())[offset]; + Nd4jLong offset = shape::getOffset(arr6.shapeInfo(), indices); + sum += ((double*)arr6.buffer())[offset]; } } exp.p(0, 0, i, j, sum); diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp index 971fe452e..3421edf95 100644 --- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp @@ -70,7 +70,7 @@ TEST_F(NativeOpsTests, PointerTests_1) { #ifdef __CUDABLAS__ printf("Unsupported for cuda now.\n"); #else - ::tryPointer(nullptr, x.getBuffer(), 4); + ::tryPointer(nullptr, x.buffer(), 4); #endif // auto exp = NDArrayFactory::create('c', {5, 5}); @@ -1061,10 +1061,9 @@ TEST_F(NativeOpsTests, ConcatTest_2) { auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); exp.linspace(1); Nd4jPointer datas[] = {x.buffer(), y.buffer()}; - Nd4jPointer shapes[] = {x.shapeInfo(), y.shapeInfo()}; + Nd4jPointer shapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)y.shapeInfo()}; - ::specialConcat(extra, - 0, 2, datas, shapes, z.buffer(), z.shapeInfo(), nullptr, nullptr); + ::specialConcat(extra, 0, 2, datas, shapes, z.buffer(), z.shapeInfo(), nullptr, nullptr); // exp.printIndexedBuffer("Exp"); // z.printIndexedBuffer("Concat"); @@ -1126,8 +1125,8 @@ TEST_F(NativeOpsTests, PullRowsTest_1) { std::vector dims = {1}; - auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims); - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims); + auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims); + auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims); Nd4jPointer nativeStart[2]; @@ -1137,8 +1136,8 @@ TEST_F(NativeOpsTests, PullRowsTest_1) { OpaqueDataBuffer xBuf(x.dataBuffer()); OpaqueDataBuffer zBuf(z.dataBuffer()); - pullRows(nativeStart, &xBuf, x.getShapeInfo(), x.getSpecialShapeInfo(), - &zBuf, z.getShapeInfo(), z.specialShapeInfo(), + pullRows(nativeStart, &xBuf, x.shapeInfo(), x.specialShapeInfo(), + &zBuf, z.shapeInfo(), z.specialShapeInfo(), 4, pidx, xTadPack.platformShapeInfo(), xTadPack.platformOffsets(), zTadPack.platformShapeInfo(), zTadPack.platformOffsets()); @@ -1224,16 +1223,16 @@ TEST_F(NativeOpsTests, ShuffleTest_1) { exp.linspace(2,2); Nd4jPointer xList[] = {x.buffer(), x.buffer()}; Nd4jPointer dxList[] = {x.specialBuffer(), y.specialBuffer()}; - Nd4jPointer xShapeList[] = {x.shapeInfo(), y.shapeInfo()}; - Nd4jPointer dxShapeList[] = {x.specialShapeInfo(), y.specialShapeInfo()}; + Nd4jPointer xShapeList[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)y.shapeInfo()}; + Nd4jPointer dxShapeList[] = {(Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; Nd4jPointer zList[] = {z.buffer(), z.buffer()}; Nd4jPointer dzList[] = {z.specialBuffer(), z.specialBuffer()}; - Nd4jPointer zShapeList[] = {z.shapeInfo(), z.shapeInfo()}; - Nd4jPointer dzShapeList[] = {z.specialShapeInfo(), z.specialShapeInfo()}; + Nd4jPointer zShapeList[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.shapeInfo()}; + Nd4jPointer dzShapeList[] = {(Nd4jPointer)z.specialShapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; int shuffleMap[] = {1, 0, 4, 3, 2}; - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - Nd4jPointer zListOffset[] = {zTadPack.platformOffsets(), zTadPack.platformOffsets()}; - Nd4jPointer zListTADs[] = {zTadPack.platformShapeInfo(), zTadPack.platformShapeInfo()}; + auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + Nd4jPointer zListOffset[] = {(Nd4jPointer)zTadPack.platformOffsets(), (Nd4jPointer)zTadPack.platformOffsets()}; + Nd4jPointer zListTADs[] = {(Nd4jPointer)zTadPack.platformShapeInfo(), (Nd4jPointer)zTadPack.platformShapeInfo()}; ::shuffle(nullptr, xList, xShapeList, dxList, dxShapeList, @@ -1494,11 +1493,11 @@ TEST_F(NativeOpsTests, CustomOpTest_1) { sd::ops::squeeze op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto status = ::execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -1516,9 +1515,9 @@ TEST_F(NativeOpsTests, CustomOpTests_2) { NDArray::prepareSpecialUse({&z}, {&array0, &array1}); - ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.getSpecialBuffer(), array0.getSpecialShapeInfo()); - ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.getSpecialBuffer(), array1.getSpecialShapeInfo()); - ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()); + ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.specialBuffer(), array0.specialShapeInfo()); + ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.specialBuffer(), array1.specialShapeInfo()); + ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); ASSERT_EQ(2, ctx.width()); @@ -1539,7 +1538,7 @@ TEST_F(NativeOpsTests, CalculateOutputShapeTests_1) { std::vector tArgs({}); std::vector iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1}); - Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()}; + Nd4jPointer ptrs[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weights.shapeInfo()}; #ifdef __CUDABLAS__ return; #endif @@ -1572,7 +1571,7 @@ TEST_F(NativeOpsTests, CalculateOutputShapeTests_2) { std::vector bArgsF({}); std::vector iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1}); - Nd4jPointer shapePtrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()}; + Nd4jPointer shapePtrs[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weights.shapeInfo()}; Nd4jPointer dataPtrs[] = {(Nd4jPointer)input.buffer(), (Nd4jPointer)weights.buffer()}; #ifdef __CUDABLAS__ return; diff --git a/libnd4j/tests_cpu/layers_tests/PairwiseTests.cpp b/libnd4j/tests_cpu/layers_tests/PairwiseTests.cpp index 9cb2589c1..0d73b369b 100644 --- a/libnd4j/tests_cpu/layers_tests/PairwiseTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PairwiseTests.cpp @@ -22,9 +22,9 @@ class EqualsTest : public testing::Test { public: - Nd4jLong firstShapeBuffer[8] = {2,1,2,1,1,0,1,102}; + const Nd4jLong firstShapeBuffer[8] = {2,1,2,1,1,0,1,102}; float data[2] = {1.0f, 7.0f}; - Nd4jLong secondShapeBuffer[8] = {2,2,1,6,1,0,6,99}; + const Nd4jLong secondShapeBuffer[8] = {2,2,1,6,1,0,6,99}; float dataSecond[12] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; int opNum = 4; float extraArgs[1] = {1e-6f}; diff --git a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp index 56ca6b95e..c4c1806bd 100644 --- a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp @@ -1097,8 +1097,8 @@ TEST_F(RNGTests, Test_Uniform_4) { #endif TEST_F(RNGTests, test_choice_1) { - auto x = NDArrayFactory::linspace(0, 10, 11); - auto prob = NDArrayFactory::valueOf({11}, 1.0/11, 'c'); + const auto x = NDArrayFactory::linspace(0, 10, 11); + const auto prob = NDArrayFactory::valueOf({11}, 1.0/11, 'c'); auto z = NDArrayFactory::create('c', {1000}); RandomGenerator rng(119, 256); diff --git a/libnd4j/tests_cpu/layers_tests/ReduceTests.cpp b/libnd4j/tests_cpu/layers_tests/ReduceTests.cpp deleted file mode 100644 index adbe28a41..000000000 --- a/libnd4j/tests_cpu/layers_tests/ReduceTests.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by agibsonccc on 1/15/17. -// -#include -#include "testinclude.h" -#include -#include -#include - -class ReduceTest : public testing::Test { -public: - Nd4jLong shape[2] = {500,3}; - float x[1500] = {4.0f, 2.0f, 3.0f, 8.0f, 4.0f, 6.0f, 12.0f, 6.0f, 9.0f, 16.0f, 8.0f, 12.0f, 20.0f, 10.0f, 15.0f, 24.0f, 12.0f, 18.0f, 28.0f, 14.0f, 21.0f, 32.0f, 16.0f, 24.0f, 36.0f, 18.0f, 27.0f, 40.0f, 20.0f, 30.0f, 44.0f, 22.0f, 33.0f, 48.0f, 24.0f, 36.0f, 52.0f, 26.0f, 39.0f, 56.0f, 28.0f, 42.0f, 60.0f, 30.0f, 45.0f, 64.0f, 32.0f, 48.0f, 68.0f, 34.0f, 51.0f, 72.0f, 36.0f, 54.0f, 76.0f, 38.0f, 57.0f, 80.0f, 40.0f, 60.0f, 84.0f, 42.0f, 63.0f, 88.0f, 44.0f, 66.0f, 92.0f, 46.0f, 69.0f, 96.0f, 48.0f, 72.0f, 100.0f, 50.0f, 75.0f, 104.0f, 52.0f, 78.0f, 108.0f, 54.0f, 81.0f, 112.0f, 56.0f, 84.0f, 116.0f, 58.0f, 87.0f, 120.0f, 60.0f, 90.0f, 124.0f, 62.0f, 93.0f, 128.0f, 64.0f, 96.0f, 132.0f, 66.0f, 99.0f, 136.0f, 68.0f, 102.0f, 140.0f, 70.0f, 105.0f, 144.0f, 72.0f, 108.0f, 148.0f, 74.0f, 111.0f, 152.0f, 76.0f, 114.0f, 156.0f, 78.0f, 117.0f, 160.0f, 80.0f, 120.0f, 164.0f, 82.0f, 123.0f, 168.0f, 84.0f, 126.0f, 172.0f, 86.0f, 129.0f, 176.0f, 88.0f, 132.0f, 180.0f, 90.0f, 135.0f, 184.0f, 92.0f, 138.0f, 188.0f, 94.0f, 141.0f, 192.0f, 96.0f, 144.0f, 196.0f, 98.0f, 147.0f, 200.0f, 100.0f, 150.0f, 204.0f, 102.0f, 153.0f, 208.0f, 104.0f, 156.0f, 212.0f, 106.0f, 159.0f, 216.0f, 108.0f, 162.0f, 220.0f, 110.0f, 165.0f, 224.0f, 112.0f, 168.0f, 228.0f, 114.0f, 171.0f, 232.0f, 116.0f, 174.0f, 236.0f, 118.0f, 177.0f, 240.0f, 120.0f, 180.0f, 244.0f, 122.0f, 183.0f, 248.0f, 124.0f, 186.0f, 252.0f, 126.0f, 189.0f, 256.0f, 128.0f, 192.0f, 260.0f, 130.0f, 195.0f, 264.0f, 132.0f, 198.0f, 268.0f, 134.0f, 201.0f, 272.0f, 136.0f, 204.0f, 276.0f, 138.0f, 207.0f, 280.0f, 140.0f, 210.0f, 284.0f, 142.0f, 213.0f, 288.0f, 144.0f, 216.0f, 292.0f, 146.0f, 219.0f, 296.0f, 148.0f, 222.0f, 300.0f, 150.0f, 225.0f, 304.0f, 152.0f, 228.0f, 308.0f, 154.0f, 231.0f, 312.0f, 156.0f, 234.0f, 316.0f, 158.0f, 237.0f, 320.0f, 160.0f, 240.0f, 324.0f, 162.0f, 243.0f, 328.0f, 164.0f, 246.0f, 332.0f, 166.0f, 249.0f, 336.0f, 168.0f, 252.0f, 340.0f, 170.0f, 255.0f, 344.0f, 172.0f, 258.0f, 348.0f, 174.0f, 261.0f, 352.0f, 176.0f, 264.0f, 356.0f, 178.0f, 267.0f, 360.0f, 180.0f, 270.0f, 364.0f, 182.0f, 273.0f, 368.0f, 184.0f, 276.0f, 372.0f, 186.0f, 279.0f, 376.0f, 188.0f, 282.0f, 380.0f, 190.0f, 285.0f, 384.0f, 192.0f, 288.0f, 388.0f, 194.0f, 291.0f, 392.0f, 196.0f, 294.0f, 396.0f, 198.0f, 297.0f, 400.0f, 200.0f, 300.0f, 404.0f, 202.0f, 303.0f, 408.0f, 204.0f, 306.0f, 412.0f, 206.0f, 309.0f, 416.0f, 208.0f, 312.0f, 420.0f, 210.0f, 315.0f, 424.0f, 212.0f, 318.0f, 428.0f, 214.0f, 321.0f, 432.0f, 216.0f, 324.0f, 436.0f, 218.0f, 327.0f, 440.0f, 220.0f, 330.0f, 444.0f, 222.0f, 333.0f, 448.0f, 224.0f, 336.0f, 452.0f, 226.0f, 339.0f, 456.0f, 228.0f, 342.0f, 460.0f, 230.0f, 345.0f, 464.0f, 232.0f, 348.0f, 468.0f, 234.0f, 351.0f, 472.0f, 236.0f, 354.0f, 476.0f, 238.0f, 357.0f, 480.0f, 240.0f, 360.0f, 484.0f, 242.0f, 363.0f, 488.0f, 244.0f, 366.0f, 492.0f, 246.0f, 369.0f, 496.0f, 248.0f, 372.0f, 500.0f, 250.0f, 375.0f, 504.0f, 252.0f, 378.0f, 508.0f, 254.0f, 381.0f, 512.0f, 256.0f, 384.0f, 516.0f, 258.0f, 387.0f, 520.0f, 260.0f, 390.0f, 524.0f, 262.0f, 393.0f, 528.0f, 264.0f, 396.0f, 532.0f, 266.0f, 399.0f, 536.0f, 268.0f, 402.0f, 540.0f, 270.0f, 405.0f, 544.0f, 272.0f, 408.0f, 548.0f, 274.0f, 411.0f, 552.0f, 276.0f, 414.0f, 556.0f, 278.0f, 417.0f, 560.0f, 280.0f, 420.0f, 564.0f, 282.0f, 423.0f, 568.0f, 284.0f, 426.0f, 572.0f, 286.0f, 429.0f, 576.0f, 288.0f, 432.0f, 580.0f, 290.0f, 435.0f, 584.0f, 292.0f, 438.0f, 588.0f, 294.0f, 441.0f, 592.0f, 296.0f, 444.0f, 596.0f, 298.0f, 447.0f, 600.0f, 300.0f, 450.0f, 604.0f, 302.0f, 453.0f, 608.0f, 304.0f, 456.0f, 612.0f, 306.0f, 459.0f, 616.0f, 308.0f, 462.0f, 620.0f, 310.0f, 465.0f, 624.0f, 312.0f, 468.0f, 628.0f, 314.0f, 471.0f, 632.0f, 316.0f, 474.0f, 636.0f, 318.0f, 477.0f, 640.0f, 320.0f, 480.0f, 644.0f, 322.0f, 483.0f, 648.0f, 324.0f, 486.0f, 652.0f, 326.0f, 489.0f, 656.0f, 328.0f, 492.0f, 660.0f, 330.0f, 495.0f, 664.0f, 332.0f, 498.0f, 668.0f, 334.0f, 501.0f, 672.0f, 336.0f, 504.0f, 676.0f, 338.0f, 507.0f, 680.0f, 340.0f, 510.0f, 684.0f, 342.0f, 513.0f, 688.0f, 344.0f, 516.0f, 692.0f, 346.0f, 519.0f, 696.0f, 348.0f, 522.0f, 700.0f, 350.0f, 525.0f, 704.0f, 352.0f, 528.0f, 708.0f, 354.0f, 531.0f, 712.0f, 356.0f, 534.0f, 716.0f, 358.0f, 537.0f, 720.0f, 360.0f, 540.0f, 724.0f, 362.0f, 543.0f, 728.0f, 364.0f, 546.0f, 732.0f, 366.0f, 549.0f, 736.0f, 368.0f, 552.0f, 740.0f, 370.0f, 555.0f, 744.0f, 372.0f, 558.0f, 748.0f, 374.0f, 561.0f, 752.0f, 376.0f, 564.0f, 756.0f, 378.0f, 567.0f, 760.0f, 380.0f, 570.0f, 764.0f, 382.0f, 573.0f, 768.0f, 384.0f, 576.0f, 772.0f, 386.0f, 579.0f, 776.0f, 388.0f, 582.0f, 780.0f, 390.0f, 585.0f, 784.0f, 392.0f, 588.0f, 788.0f, 394.0f, 591.0f, 792.0f, 396.0f, 594.0f, 796.0f, 398.0f, 597.0f, 800.0f, 400.0f, 600.0f, 804.0f, 402.0f, 603.0f, 808.0f, 404.0f, 606.0f, 812.0f, 406.0f, 609.0f, 816.0f, 408.0f, 612.0f, 820.0f, 410.0f, 615.0f, 824.0f, 412.0f, 618.0f, 828.0f, 414.0f, 621.0f, 832.0f, 416.0f, 624.0f, 836.0f, 418.0f, 627.0f, 840.0f, 420.0f, 630.0f, 844.0f, 422.0f, 633.0f, 848.0f, 424.0f, 636.0f, 852.0f, 426.0f, 639.0f, 856.0f, 428.0f, 642.0f, 860.0f, 430.0f, 645.0f, 864.0f, 432.0f, 648.0f, 868.0f, 434.0f, 651.0f, 872.0f, 436.0f, 654.0f, 876.0f, 438.0f, 657.0f, 880.0f, 440.0f, 660.0f, 884.0f, 442.0f, 663.0f, 888.0f, 444.0f, 666.0f, 892.0f, 446.0f, 669.0f, 896.0f, 448.0f, 672.0f, 900.0f, 450.0f, 675.0f, 904.0f, 452.0f, 678.0f, 908.0f, 454.0f, 681.0f, 912.0f, 456.0f, 684.0f, 916.0f, 458.0f, 687.0f, 920.0f, 460.0f, 690.0f, 924.0f, 462.0f, 693.0f, 928.0f, 464.0f, 696.0f, 932.0f, 466.0f, 699.0f, 936.0f, 468.0f, 702.0f, 940.0f, 470.0f, 705.0f, 944.0f, 472.0f, 708.0f, 948.0f, 474.0f, 711.0f, 952.0f, 476.0f, 714.0f, 956.0f, 478.0f, 717.0f, 960.0f, 480.0f, 720.0f, 964.0f, 482.0f, 723.0f, 968.0f, 484.0f, 726.0f, 972.0f, 486.0f, 729.0f, 976.0f, 488.0f, 732.0f, 980.0f, 490.0f, 735.0f, 984.0f, 492.0f, 738.0f, 988.0f, 494.0f, 741.0f, 992.0f, 496.0f, 744.0f, 996.0f, 498.0f, 747.0f, 1000.0f, 500.0f, 750.0f, 1004.0f, 502.0f, 753.0f, 1008.0f, 504.0f, 756.0f, 1012.0f, 506.0f, 759.0f, 1016.0f, 508.0f, 762.0f, 1020.0f, 510.0f, 765.0f, 1024.0f, 512.0f, 768.0f, 1028.0f, 514.0f, 771.0f, 1032.0f, 516.0f, 774.0f, 1036.0f, 518.0f, 777.0f, 1040.0f, 520.0f, 780.0f, 1044.0f, 522.0f, 783.0f, 1048.0f, 524.0f, 786.0f, 1052.0f, 526.0f, 789.0f, 1056.0f, 528.0f, 792.0f, 1060.0f, 530.0f, 795.0f, 1064.0f, 532.0f, 798.0f, 1068.0f, 534.0f, 801.0f, 1072.0f, 536.0f, 804.0f, 1076.0f, 538.0f, 807.0f, 1080.0f, 540.0f, 810.0f, 1084.0f, 542.0f, 813.0f, 1088.0f, 544.0f, 816.0f, 1092.0f, 546.0f, 819.0f, 1096.0f, 548.0f, 822.0f, 1100.0f, 550.0f, 825.0f, 1104.0f, 552.0f, 828.0f, 1108.0f, 554.0f, 831.0f, 1112.0f, 556.0f, 834.0f, 1116.0f, 558.0f, 837.0f, 1120.0f, 560.0f, 840.0f, 1124.0f, 562.0f, 843.0f, 1128.0f, 564.0f, 846.0f, 1132.0f, 566.0f, 849.0f, 1136.0f, 568.0f, 852.0f, 1140.0f, 570.0f, 855.0f, 1144.0f, 572.0f, 858.0f, 1148.0f, 574.0f, 861.0f, 1152.0f, 576.0f, 864.0f, 1156.0f, 578.0f, 867.0f, 1160.0f, 580.0f, 870.0f, 1164.0f, 582.0f, 873.0f, 1168.0f, 584.0f, 876.0f, 1172.0f, 586.0f, 879.0f, 1176.0f, 588.0f, 882.0f, 1180.0f, 590.0f, 885.0f, 1184.0f, 592.0f, 888.0f, 1188.0f, 594.0f, 891.0f, 1192.0f, 596.0f, 894.0f, 1196.0f, 598.0f, 897.0f, 1200.0f, 600.0f, 900.0f, 1204.0f, 602.0f, 903.0f, 1208.0f, 604.0f, 906.0f, 1212.0f, 606.0f, 909.0f, 1216.0f, 608.0f, 912.0f, 1220.0f, 610.0f, 915.0f, 1224.0f, 612.0f, 918.0f, 1228.0f, 614.0f, 921.0f, 1232.0f, 616.0f, 924.0f, 1236.0f, 618.0f, 927.0f, 1240.0f, 620.0f, 930.0f, 1244.0f, 622.0f, 933.0f, 1248.0f, 624.0f, 936.0f, 1252.0f, 626.0f, 939.0f, 1256.0f, 628.0f, 942.0f, 1260.0f, 630.0f, 945.0f, 1264.0f, 632.0f, 948.0f, 1268.0f, 634.0f, 951.0f, 1272.0f, 636.0f, 954.0f, 1276.0f, 638.0f, 957.0f, 1280.0f, 640.0f, 960.0f, 1284.0f, 642.0f, 963.0f, 1288.0f, 644.0f, 966.0f, 1292.0f, 646.0f, 969.0f, 1296.0f, 648.0f, 972.0f, 1300.0f, 650.0f, 975.0f, 1304.0f, 652.0f, 978.0f, 1308.0f, 654.0f, 981.0f, 1312.0f, 656.0f, 984.0f, 1316.0f, 658.0f, 987.0f, 1320.0f, 660.0f, 990.0f, 1324.0f, 662.0f, 993.0f, 1328.0f, 664.0f, 996.0f, 1332.0f, 666.0f, 999.0f, 1336.0f, 668.0f, 1002.0f, 1340.0f, 670.0f, 1005.0f, 1344.0f, 672.0f, 1008.0f, 1348.0f, 674.0f, 1011.0f, 1352.0f, 676.0f, 1014.0f, 1356.0f, 678.0f, 1017.0f, 1360.0f, 680.0f, 1020.0f, 1364.0f, 682.0f, 1023.0f, 1368.0f, 684.0f, 1026.0f, 1372.0f, 686.0f, 1029.0f, 1376.0f, 688.0f, 1032.0f, 1380.0f, 690.0f, 1035.0f, 1384.0f, 692.0f, 1038.0f, 1388.0f, 694.0f, 1041.0f, 1392.0f, 696.0f, 1044.0f, 1396.0f, 698.0f, 1047.0f, 1400.0f, 700.0f, 1050.0f, 1404.0f, 702.0f, 1053.0f, 1408.0f, 704.0f, 1056.0f, 1412.0f, 706.0f, 1059.0f, 1416.0f, 708.0f, 1062.0f, 1420.0f, 710.0f, 1065.0f, 1424.0f, 712.0f, 1068.0f, 1428.0f, 714.0f, 1071.0f, 1432.0f, 716.0f, 1074.0f, 1436.0f, 718.0f, 1077.0f, 1440.0f, 720.0f, 1080.0f, 1444.0f, 722.0f, 1083.0f, 1448.0f, 724.0f, 1086.0f, 1452.0f, 726.0f, 1089.0f, 1456.0f, 728.0f, 1092.0f, 1460.0f, 730.0f, 1095.0f, 1464.0f, 732.0f, 1098.0f, 1468.0f, 734.0f, 1101.0f, 1472.0f, 736.0f, 1104.0f, 1476.0f, 738.0f, 1107.0f, 1480.0f, 740.0f, 1110.0f, 1484.0f, 742.0f, 1113.0f, 1488.0f, 744.0f, 1116.0f, 1492.0f, 746.0f, 1119.0f, 1496.0f, 748.0f, 1122.0f, 1500.0f, 750.0f, 1125.0f, 1504.0f, 752.0f, 1128.0f, 1508.0f, 754.0f, 1131.0f, 1512.0f, 756.0f, 1134.0f, 1516.0f, 758.0f, 1137.0f, 1520.0f, 760.0f, 1140.0f, 1524.0f, 762.0f, 1143.0f, 1528.0f, 764.0f, 1146.0f, 1532.0f, 766.0f, 1149.0f, 1536.0f, 768.0f, 1152.0f, 1540.0f, 770.0f, 1155.0f, 1544.0f, 772.0f, 1158.0f, 1548.0f, 774.0f, 1161.0f, 1552.0f, 776.0f, 1164.0f, 1556.0f, 778.0f, 1167.0f, 1560.0f, 780.0f, 1170.0f, 1564.0f, 782.0f, 1173.0f, 1568.0f, 784.0f, 1176.0f, 1572.0f, 786.0f, 1179.0f, 1576.0f, 788.0f, 1182.0f, 1580.0f, 790.0f, 1185.0f, 1584.0f, 792.0f, 1188.0f, 1588.0f, 794.0f, 1191.0f, 1592.0f, 796.0f, 1194.0f, 1596.0f, 798.0f, 1197.0f, 1600.0f, 800.0f, 1200.0f, 1604.0f, 802.0f, 1203.0f, 1608.0f, 804.0f, 1206.0f, 1612.0f, 806.0f, 1209.0f, 1616.0f, 808.0f, 1212.0f, 1620.0f, 810.0f, 1215.0f, 1624.0f, 812.0f, 1218.0f, 1628.0f, 814.0f, 1221.0f, 1632.0f, 816.0f, 1224.0f, 1636.0f, 818.0f, 1227.0f, 1640.0f, 820.0f, 1230.0f, 1644.0f, 822.0f, 1233.0f, 1648.0f, 824.0f, 1236.0f, 1652.0f, 826.0f, 1239.0f, 1656.0f, 828.0f, 1242.0f, 1660.0f, 830.0f, 1245.0f, 1664.0f, 832.0f, 1248.0f, 1668.0f, 834.0f, 1251.0f, 1672.0f, 836.0f, 1254.0f, 1676.0f, 838.0f, 1257.0f, 1680.0f, 840.0f, 1260.0f, 1684.0f, 842.0f, 1263.0f, 1688.0f, 844.0f, 1266.0f, 1692.0f, 846.0f, 1269.0f, 1696.0f, 848.0f, 1272.0f, 1700.0f, 850.0f, 1275.0f, 1704.0f, 852.0f, 1278.0f, 1708.0f, 854.0f, 1281.0f, 1712.0f, 856.0f, 1284.0f, 1716.0f, 858.0f, 1287.0f, 1720.0f, 860.0f, 1290.0f, 1724.0f, 862.0f, 1293.0f, 1728.0f, 864.0f, 1296.0f, 1732.0f, 866.0f, 1299.0f, 1736.0f, 868.0f, 1302.0f, 1740.0f, 870.0f, 1305.0f, 1744.0f, 872.0f, 1308.0f, 1748.0f, 874.0f, 1311.0f, 1752.0f, 876.0f, 1314.0f, 1756.0f, 878.0f, 1317.0f, 1760.0f, 880.0f, 1320.0f, 1764.0f, 882.0f, 1323.0f, 1768.0f, 884.0f, 1326.0f, 1772.0f, 886.0f, 1329.0f, 1776.0f, 888.0f, 1332.0f, 1780.0f, 890.0f, 1335.0f, 1784.0f, 892.0f, 1338.0f, 1788.0f, 894.0f, 1341.0f, 1792.0f, 896.0f, 1344.0f, 1796.0f, 898.0f, 1347.0f, 1800.0f, 900.0f, 1350.0f, 1804.0f, 902.0f, 1353.0f, 1808.0f, 904.0f, 1356.0f, 1812.0f, 906.0f, 1359.0f, 1816.0f, 908.0f, 1362.0f, 1820.0f, 910.0f, 1365.0f, 1824.0f, 912.0f, 1368.0f, 1828.0f, 914.0f, 1371.0f, 1832.0f, 916.0f, 1374.0f, 1836.0f, 918.0f, 1377.0f, 1840.0f, 920.0f, 1380.0f, 1844.0f, 922.0f, 1383.0f, 1848.0f, 924.0f, 1386.0f, 1852.0f, 926.0f, 1389.0f, 1856.0f, 928.0f, 1392.0f, 1860.0f, 930.0f, 1395.0f, 1864.0f, 932.0f, 1398.0f, 1868.0f, 934.0f, 1401.0f, 1872.0f, 936.0f, 1404.0f, 1876.0f, 938.0f, 1407.0f, 1880.0f, 940.0f, 1410.0f, 1884.0f, 942.0f, 1413.0f, 1888.0f, 944.0f, 1416.0f, 1892.0f, 946.0f, 1419.0f, 1896.0f, 948.0f, 1422.0f, 1900.0f, 950.0f, 1425.0f, 1904.0f, 952.0f, 1428.0f, 1908.0f, 954.0f, 1431.0f, 1912.0f, 956.0f, 1434.0f, 1916.0f, 958.0f, 1437.0f, 1920.0f, 960.0f, 1440.0f, 1924.0f, 962.0f, 1443.0f, 1928.0f, 964.0f, 1446.0f, 1932.0f, 966.0f, 1449.0f, 1936.0f, 968.0f, 1452.0f, 1940.0f, 970.0f, 1455.0f, 1944.0f, 972.0f, 1458.0f, 1948.0f, 974.0f, 1461.0f, 1952.0f, 976.0f, 1464.0f, 1956.0f, 978.0f, 1467.0f, 1960.0f, 980.0f, 1470.0f, 1964.0f, 982.0f, 1473.0f, 1968.0f, 984.0f, 1476.0f, 1972.0f, 986.0f, 1479.0f, 1976.0f, 988.0f, 1482.0f, 1980.0f, 990.0f, 1485.0f, 1984.0f, 992.0f, 1488.0f, 1988.0f, 994.0f, 1491.0f, 1992.0f, 996.0f, 1494.0f, 1996.0f, 998.0f, 1497.0f, 2000.0f, 1000.0f, 1500.0f}; - float result[1500] = {0.f}; - int dimension[1] = {0}; - std::vector dim = {0}; - int dimensionLength = 1; - float theoreticalMin[3] = {4.f, 2.f, 3.f}; - float theoreticalMax[3] = {2000.00f, 1000.00f, 1500.00f}; - float theoreticalRange[3] = {1996.00f, 998.00f, 1497.00f}; -}; - -class StdTest : public testing::Test { -public: - Nd4jLong examplesShape[4] = {10,5,10,15}; - int dimensionsForStd[3] = {0,2,3}; - std::vector dimsForStd = {0,2,3}; - int dimensionLength = 3; - //standard deviation - int opNum = 1; - float x[7500] ={0.5786382f, 0.16236664f, 0.069020785f, 0.9840061f, 0.941816f, 0.76720303f, 0.7794372f, 0.46979624f, 0.73381734f, 0.9957244f, 0.6167372f, 0.53088397f, 0.28015637f, 0.826945f, 0.83352476f, 0.66504276f, 0.5793391f, 0.47484478f, 0.7076381f, 0.49456358f, 0.62396896f, 0.53332835f, 0.6388812f, 0.68836075f, 0.26663998f, 0.0014623206f, 0.19409843f, 0.56639415f, 0.98213744f, 0.68497056f, 0.867037f, 0.76840234f, 0.318186f, 0.28759065f, 0.11965875f, 0.53291357f, 0.53767395f, 0.55705845f, 0.7467155f, 0.1575149f, 0.18076386f, 0.8174763f, 0.22883898f, 0.5071535f, 0.86735153f, 0.9635827f, 0.24558435f, 0.15767147f, 0.458882f, 0.71102697f, 0.21914826f, 0.16241662f, 0.27248728f, 0.89015275f, 0.71070856f, 0.55088985f, 0.98992974f, 0.70927286f, 0.9261268f, 0.50781846f, 0.62151235f, 0.4590896f, 0.7487442f, 0.21744072f, 0.2636398f, 0.084352165f, 0.46951914f, 0.383644f, 0.6749645f, 0.24111961f, 0.83259743f, 0.05546627f, 0.4790621f, 0.68884027f, 0.90992177f, 0.23907907f, 0.5342047f, 0.221003f, 0.29615387f, 0.43343517f, 0.16554528f, 0.73144174f, 0.52923626f, 0.10688303f, 0.78197056f, 0.39259177f, 0.43832788f, 0.052234255f, 0.5795483f, 0.97033966f, 0.7392455f, 0.086584255f, 0.9092887f, 0.9402065f, 0.9126419f, 0.44749174f, 0.20514569f, 0.8749829f, 0.30917913f, 0.10170506f, 0.37034252f, 0.7427814f, 0.5497875f, 0.3116048f, 0.12112484f, 0.07918618f, 0.6003074f, 0.6188079f, 0.6292188f, 0.26580265f, 0.42029652f, 0.9863358f, 0.41489154f, 0.23757206f, 0.30395788f, 0.75231904f, 0.76751274f, 0.6324773f, 0.3231405f, 0.5016677f, 0.86029065f, 0.575702f, 0.7473972f, 0.118974194f, 0.115586124f, 0.62481487f, 0.91101325f, 0.6137756f, 0.71462154f, 0.995567f, 0.93439484f, 0.37260458f, 0.6033152f, 0.3444346f, 0.91579247f, 0.7452442f, 0.97466874f, 0.6299154f, 0.35426098f, 0.50121397f, 0.14155711f, 0.78726757f, 0.028531995f, 0.8435531f, 0.6444501f, 0.8826095f, 0.25354537f, 0.5547923f, 0.99555415f, 0.8430975f, 246.29712f, 253.4231f, 282.26755f, 215.6161f, 251.57019f, 239.20515f, 296.2021f, 234.32518f, 278.9852f, 235.4248f, 238.70155f, 256.9956f, 212.62695f, 288.38763f, 231.21237f, 284.80396f, 261.86835f, 223.92522f, 205.86221f, 234.742f, 262.11407f, 298.1942f, 242.60652f, 238.83704f, 251.6588f, 267.23315f, 294.4865f, 223.47488f, 259.24976f, 251.82695f, 265.01166f, 234.65732f, 265.1853f, 202.15352f, 244.42313f, 253.90427f, 212.09233f, 227.62961f, 237.77951f, 261.36838f, 234.32147f, 240.81522f, 273.62595f, 221.19333f, 284.11353f, 216.00859f, 284.36948f, 243.90376f, 282.61584f, 256.97165f, 275.08722f, 253.8055f, 265.1405f, 298.87567f, 223.393f, 288.02148f, 287.26102f, 276.36237f, 290.52777f, 299.57062f, 224.73566f, 290.82623f, 231.3513f, 238.51828f, 230.74028f, 224.97539f, 290.11844f, 238.00816f, 290.39606f, 291.32538f, 272.94766f, 211.88446f, 291.66742f, 210.34077f, 285.62628f, 246.31918f, 283.68738f, 282.34418f, 223.43613f, 245.08679f, 235.22693f, 246.01146f, 224.03375f, 280.5359f, 226.01413f, 262.18884f, 237.87335f, 238.89404f, 259.04294f, 202.59842f, 294.69302f, 209.01956f, 244.75763f, 264.3232f, 293.4627f, 287.69165f, 236.79088f, 282.37012f, 222.24211f, 293.5885f, 249.6388f, 273.91916f, 215.40356f, 255.45584f, 268.4702f, 275.81577f, 259.25064f, 224.95108f, 250.37906f, 267.89093f, 256.31766f, 227.89124f, 204.10915f, 263.38596f, 213.62708f, 218.84116f, 289.00494f, 216.93646f, 200.29439f, 284.1103f, 216.20671f, 260.57642f, 248.57745f, 241.73776f, 244.7205f, 286.86218f, 206.42664f, 204.06395f, 216.60626f, 224.02377f, 219.4697f, 287.2509f, 246.91132f, 289.83777f, 292.73767f, 202.73048f, 206.4165f, 294.0605f, 276.23276f, 288.51318f, 279.45175f, 253.69833f, 281.3311f, 249.44318f, 287.76288f, 262.2878f, 238.2247f, 203.41438f, 208.8359f, 274.0062f, -9.999092f, -9.99934f, -9.999794f, -9.999654f, -9.999987f, -9.999574f, -9.99965f, -9.999892f, -9.999203f, -9.999798f, -9.999658f, -9.999974f, -9.999982f, -9.999003f, -9.999369f, -9.999311f, -9.999708f, -9.999327f, -9.999302f, -9.999419f, -9.999553f, -9.9991665f, -9.999842f, -9.9991665f, -9.999702f, -9.999081f, -9.9993725f, -9.999735f, -9.999399f, -9.999073f, -9.999045f, -9.999458f, -9.99971f, -9.999414f, -9.999165f, -9.999782f, -9.999417f, -9.999513f, -9.999398f, -9.999933f, -9.999367f, -9.999933f, -9.999302f, -9.999572f, -9.999926f, -9.999371f, -9.999746f, -9.999628f, -9.9995165f, -9.999816f, -9.9998255f, -9.999983f, -9.999482f, -9.99976f, -9.999302f, -9.999825f, -9.999026f, -9.999029f, -9.999147f, -9.9995f, -9.999214f, -9.999216f, -9.999818f, -9.999334f, -9.999354f, -9.999414f, -9.999564f, -9.99962f, -9.999615f, -9.999496f, -9.999803f, -9.999454f, -9.999789f, -9.999615f, -9.999473f, -9.999701f, -9.999164f, -9.999112f, -9.9991865f, -9.999779f, -9.999639f, -9.999739f, -9.999949f, -9.999005f, -9.999157f, -9.999394f, -9.999148f, -9.999729f, -9.999721f, -9.999721f, -9.999678f, -9.999215f, -9.99921f, -9.999848f, -9.999702f, -9.999167f, -9.999995f, -9.999203f, -9.999381f, -9.999537f, -9.999643f, -9.999887f, -9.999234f, -9.999761f, -9.999863f, -9.9999275f, -9.99965f, -9.999459f, -9.999674f, -9.999408f, -9.999761f, -9.999802f, -9.999465f, -9.999648f, -9.999447f, -9.999051f, -9.999212f, -9.999952f, -9.999188f, -9.999153f, -9.999513f, -9.999785f, -9.999538f, -9.999458f, -9.999802f, -9.999176f, -9.999821f, -9.999529f, -9.999089f, -9.999206f, -9.999853f, -9.999218f, -9.999763f, -9.999283f, -9.999687f, -9.999333f, -9.9996195f, -9.999563f, -9.99978f, -9.999214f, -9.999417f, -9.999161f, -9.999615f, -9.999529f, -9.999715f, -9.99965f, -9.999793f, -9.999159f, -9.999804f, -9.999826f, 0.25581473f, 0.011998488f, 0.19125576f, 0.26596868f, 0.21618238f, 0.7962773f, 0.8030581f, 0.7543603f, 0.37575766f, 0.764879f, 0.10974313f, 0.06437898f, 0.26072952f, 0.30300763f, 0.029973997f, 0.025493756f, 0.21206349f, 0.7668091f, 0.53181326f, 0.36343664f, 0.5012292f, 0.17466855f, 0.188394f, 0.73864985f, 0.4810524f, 0.42596745f, 0.17328279f, 0.2649388f, 0.5691122f, 0.6979966f, 0.40108117f, 0.680846f, 0.8891427f, 0.36562127f, 0.5258834f, 0.02162829f, 0.34679192f, 0.51932955f, 0.5934363f, 0.8976068f, 0.17759448f, 0.84487504f, 0.08563967f, 0.8079017f, 0.53375924f, 0.5292685f, 0.7386051f, 0.84675163f, 0.52025354f, 0.402771f, 0.25339442f, 0.020660425f, 0.8532977f, 0.26857603f, 0.08696012f, 0.30953142f, 0.05712433f, 0.52134746f, 0.668039f, 0.8811842f, 0.84066904f, 0.5784957f, 0.13710192f, 0.25812075f, 0.12778813f, 0.6114538f, 0.68826395f, 0.6296169f, 0.050615292f, 0.60265064f, 0.59383374f, 0.50250226f, 0.5533876f, 0.80024f, 0.15964289f, 0.44098398f, 0.3639451f, 0.9836441f, 0.59009975f, 0.42786047f, 0.66358715f, 0.77674544f, 0.96205765f, 0.30722687f, 0.07275952f, 0.8073388f, 0.8589582f, 0.1655514f, 0.942791f, 0.7421209f, 0.33589354f, 0.031047517f, 0.2333922f, 0.32696965f, 0.06680667f, 0.43655157f, 0.60084665f, 0.924222f, 0.5181169f, 0.8633322f, 0.07042168f, 0.3576994f, 0.23789743f, 0.98523647f, 0.35718223f, 0.09434685f, 0.7895948f, 0.6365413f, 0.7331945f, 0.8172492f, 0.2427676f, 0.23792028f, 0.7375947f, 0.72343403f, 0.47277793f, 0.53527576f, 0.30485073f, 0.64892334f, 0.15171374f, 0.8003455f, 0.9694175f, 0.3611101f, 0.8037058f, 0.7925937f, 0.18575527f, 0.81588566f, 0.094868064f, 0.9775748f, 0.6791609f, 0.26662946f, 0.18830737f, 0.595805f, 0.49300948f, 0.9033739f, 0.663468f, 0.3000145f, 0.57594025f, 0.8624458f, 0.18944798f, 0.65868706f, 0.35742447f, 0.099066f, 0.2832066f, 0.6912541f, 0.24243657f, 0.9277832f, 0.64250916f, 0.9440414f, 0.2378183f, 0.055244252f, 0.76272976f, 0.67200613f, 0.49664533f, 0.5904184f, 0.17577513f, 0.7822792f, 0.61906105f, 0.6896018f, 0.873862f, 0.9968526f, 0.4556378f, 0.87811166f, 0.86004007f, 0.41853464f, 0.5995596f, 0.40827745f, 0.28851208f, 0.5202819f, 0.19265123f, 0.92939705f, 0.70689267f, 0.11201124f, 0.98409003f, 0.18970507f, 0.7182739f, 0.5939693f, 0.05994234f, 0.021280153f, 0.14513102f, 0.40208468f, 0.22757782f, 0.23340172f, 0.3629895f, 0.13855931f, 0.78980845f, 0.8154337f, 0.9686873f, 0.03149764f, 0.027852392f, 0.7822175f, 0.3670333f, 0.78024536f, 0.44308364f, 0.7551719f, 0.7001006f, 0.99656695f, 0.7096177f, 0.6460425f, 0.3090078f, 0.3817309f, 0.75382084f, 0.24751845f, 0.9919141f, 0.8101396f, 0.72690064f, 0.58389014f, 0.13931125f, 0.4260997f, 0.19920675f, 0.29389992f, 0.22849065f, 0.054567583f, 0.0286403f, 0.68753535f, 0.6393382f, 0.83747303f, 0.43944475f, 0.16854768f, 0.659512f, 0.25002992f, 0.015794016f, 0.9449101f, 0.7541057f, 0.945847f, 0.97127223f, 0.59012526f, 0.04557803f, 0.114047214f, 0.7673727f, 0.4418709f, 0.1393514f, 0.41973236f, 0.5081946f, 0.282509f, 0.30676988f, 0.2546641f, 0.6687642f, 0.31170198f, 0.43019253f, 0.81878066f, 0.9186455f, 0.787344f, 0.119964f, 0.48843786f, 0.26080957f, 0.43372f, 0.7264191f, 0.7316731f, 0.52168936f, 0.3228819f, 0.5850103f, 0.58188486f, 0.5764724f, 0.85721606f, 0.0048306463f, 0.9518531f, 0.51219267f, 0.9845728f, 0.72086376f, 0.21577734f, 0.14109355f, 0.16697218f, 0.70463514f, 0.54204077f, 0.5187638f, 0.08548192f, 0.021048365f, 0.8778848f, 0.19857538f, 0.04883652f, 0.7117264f, 0.10805124f, 0.49904156f, 0.22152025f, 0.6800811f, 0.17553183f, 0.637131f, 0.4801609f, 0.5453409f, 0.25295126f, 0.48752138f, 0.5394039f, 0.7378793f, 0.89846796f, 0.30146414f, 0.21664028f, 0.27394173f, 0.022367671f, 0.9892407f, 0.19886415f, 0.41262844f, 0.30491787f, 0.49006933f, 0.81182134f, 0.673692f, 0.2412966f, 0.17482981f, 0.5432391f, 0.8450185f, 0.69215244f, 0.70803803f, 0.04421597f, 0.29316452f, 0.21701345f, 0.111889146f, 0.85679144f, 0.92165715f, 0.093697235f, 0.3446256f, 0.46299627f, 0.4249108f, 0.7948484f, 0.19556557f, 0.7571282f, 0.01646797f, 0.8894279f, 0.19658394f, 0.26087877f, 0.70531607f, 0.6966002f, 0.5969214f, 0.5227917f, 0.36881882f, 0.9858828f, 0.23796275f, 0.4213183f, 0.48533306f, 0.44627303f, 0.15690878f, 0.6434008f, 0.41254497f, 0.99109685f, 0.20189007f, 0.5941583f, 0.18635221f, 0.6158875f, 0.42995065f, 0.027945405f, 0.8306056f, 0.3877798f, 0.982836f, 0.49713424f, 0.91654354f, 0.6155134f, 0.814247f, 0.3077533f, 0.22847779f, 0.88966215f, 0.8747604f, 0.41640446f, 0.9716281f, 0.18517044f, 0.033389226f, 0.026901966f, 0.41404715f, 0.7838385f, 0.9055906f, 0.63307714f, 0.6555554f, 0.61210406f, 0.8100642f, 0.7994826f, 0.50656956f, 0.7002863f, 0.122354865f, 0.73366094f, 0.92528874f, 0.50401425f, 0.3586611f, 0.3649591f, 0.8697877f, 0.09153776f, 0.56987906f, 0.4228477f, 0.72918344f, 0.21651368f, 0.273237f, 0.1320687f, 0.256684f, 0.3676141f, 0.1802598f, 0.8279442f, 0.5993243f, 0.99537796f, 0.70956576f, 0.6580005f, 0.9079618f, 0.06857852f, 0.33703786f, 0.42991522f, 0.46704793f, 0.30789334f, 0.97041386f, 0.067041285f, 0.48089835f, 0.23312177f, 0.09135661f, 0.6173484f, 0.47475886f, 0.9562112f, 0.99144304f, 0.50248766f, 0.5567772f, 0.6791836f, 0.5094131f, 0.5138229f, 0.9128905f, 0.5559054f, 0.28739175f, 0.5442868f, 0.1325101f, 0.039360367f, 0.9252663f, 0.30213857f, 0.5769297f, 0.24732989f, 0.7464911f, 0.16295283f, 0.22247133f, 0.6684257f, 0.30283514f, 0.31917402f, 0.2872067f, 0.41503724f, 0.81451225f, 0.03269196f, 0.820269f, 0.5588804f, 0.26527935f, 0.6293965f, 0.40942776f, 0.6733743f, 0.5519464f, 0.7554137f, 0.28561452f, 0.19815777f, 0.14119685f, 0.8302559f, 0.47257373f, 0.45373413f, 0.26654762f, 0.51656854f, 0.16259237f, 0.8570836f, 0.6660475f, 0.9988463f, 0.2234983f, 0.29011694f, 0.19929285f, 0.87688833f, 288.208f, 299.0334f, 234.06802f, 288.59332f, 285.71396f, 208.14828f, 243.33327f, 263.37518f, 222.83241f, 267.64508f, 236.68651f, 240.05948f, 241.17122f, 227.03455f, 229.1796f, 231.68953f, 267.16785f, 205.02823f, 264.77625f, 237.24646f, 249.54239f, 232.01376f, 208.56255f, 210.85419f, 239.4313f, 285.38928f, 207.99615f, 219.70026f, 286.46414f, 259.6215f, 264.591f, 240.25525f, 212.3435f, 223.9664f, 258.98178f, 278.75095f, 267.05542f, 200.13255f, 271.41925f, 235.1554f, 277.16098f, 235.27489f, 218.60641f, 299.13928f, 237.70187f, 218.95384f, 233.26817f, 239.93466f, 210.01537f, 237.0251f, 236.5253f, 272.3498f, 248.93144f, 249.78705f, 202.80908f, 296.07632f, 248.54794f, 228.7884f, 238.64236f, 214.01402f, 231.23134f, 243.41833f, 254.53098f, 229.02164f, 210.59755f, 268.93982f, 277.32697f, 297.97763f, 259.46844f, 229.38896f, 288.10034f, 251.99005f, 273.70062f, 277.30673f, 212.11809f, 205.43094f, 270.62506f, 244.42522f, 280.7068f, 252.17372f, 221.36655f, 231.1006f, 224.59811f, 239.97418f, 257.73175f, 290.97693f, 205.1341f, 217.40971f, 275.88208f, 201.61108f, 280.00003f, 289.00586f, 267.0944f, 231.31201f, 211.03806f, 213.06203f, 269.1713f, 265.57556f, 248.42055f, 209.8977f, 286.6746f, 221.91562f, 215.06145f, 229.53949f, 269.93027f, 276.57254f, 250.9029f, 288.37958f, 228.52266f, 267.0228f, 297.99734f, 214.70332f, 253.89653f, 231.25943f, 204.15068f, 276.6967f, 213.42561f, 222.77573f, 246.64607f, 206.99153f, 251.96185f, 275.08154f, 218.24387f, 211.39914f, 266.65384f, 298.70865f, 287.00455f, 227.15556f, 247.37427f, 213.96188f, 272.59308f, 224.01898f, 235.20276f, 253.20197f, 209.47455f, 210.07729f, 261.2526f, 239.28952f, 219.84111f, 211.5859f, 263.7782f, 225.82002f, 209.55066f, 225.2778f, 276.13922f, 208.97437f, 274.6557f, 297.25998f, 287.32483f, 205.43816f, -9.999689f, -9.999144f, -9.999799f, -9.999373f, -9.999519f, -9.9993925f, -9.999233f, -9.999142f, -9.99984f, -9.999262f, -9.999546f, -9.999872f, -9.999391f, -9.999968f, -9.999606f, -9.999656f, -9.999715f, -9.99956f, -9.999932f, -9.999743f, -9.999814f, -9.999712f, -9.999522f, -9.999528f, -9.999384f, -9.999094f, -9.999038f, -9.999751f, -9.999586f, -9.99945f, -9.999128f, -9.999073f, -9.999791f, -9.999677f, -9.9991865f, -9.99909f, -9.999762f, -9.999218f, -9.9995575f, -9.999647f, -9.999325f, -9.999892f, -9.999989f, -9.999758f, -9.999248f, -9.999668f, -9.999531f, -9.999084f, -9.999631f, -9.999403f, -9.999865f, -9.999935f, -9.9991f, -9.999564f, -9.99925f, -9.9990425f, -9.999887f, -9.999345f, -9.999006f, -9.999103f, -9.999717f, -9.99932f, -9.999787f, -9.999386f, -9.999753f, -9.999903f, -9.999105f, -9.999969f, -9.999686f, -9.999083f, -9.99972f, -9.999545f, -9.999551f, -9.999687f, -9.999285f, -9.999309f, -9.999812f, -9.99978f, -9.999336f, -9.999835f, -9.999004f, -9.999377f, -9.999526f, -9.999481f, -9.999829f, -9.999929f, -9.999993f, -9.999933f, -9.999451f, -9.999956f, -9.999661f, -9.999863f, -9.9993305f, -9.999771f, -9.999426f, -9.999976f, -9.999994f, -9.999831f, -9.99988f, -9.999162f, -9.999056f, -9.999193f, -9.999941f, -9.999949f, -9.999971f, -9.999258f, -9.999011f, -9.999707f, -9.999535f, -9.999201f, -9.9995985f, -9.999823f, -9.999531f, -9.999698f, -9.999328f, -9.999958f, -9.999032f, -9.999576f, -9.999392f, -9.999067f, -9.99902f, -9.999045f, -9.99983f, -9.999011f, -9.999783f, -9.999335f, -9.999907f, -9.999681f, -9.999122f, -9.999256f, -9.999235f, -9.999991f, -9.999099f, -9.999523f, -9.999284f, -9.999148f, -9.999722f, -9.999268f, -9.999101f, -9.99915f, -9.999277f, -9.999724f, -9.999198f, -9.999702f, -9.999371f, -9.999346f, -9.999348f, -9.999846f, -9.99938f, -9.999386f, 0.9152095f, 0.9171647f, 0.8286799f, 0.06623944f, 0.4663288f, 0.6674705f, 0.88702863f, 0.26388377f, 0.38012853f, 0.22043897f, 0.34161663f, 0.7549241f, 0.89839345f, 0.57267684f, 0.46196744f, 0.40692735f, 0.63130325f, 0.46858534f, 0.25790846f, 0.5064126f, 0.6745789f, 0.815519f, 0.3279563f, 0.06752282f, 0.32830805f, 0.9456376f, 0.99969417f, 0.33946416f, 0.09058472f, 0.80821294f, 0.4096069f, 0.04731839f, 0.1274211f, 0.26724407f, 0.0013231506f, 0.89294916f, 0.14734322f, 0.3986316f, 0.44342554f, 0.37137577f, 0.55341625f, 0.49281976f, 0.7313272f, 0.2879761f, 0.20376818f, 0.9424636f, 0.21195652f, 0.22167233f, 0.5677064f, 0.36845347f, 0.079733446f, 0.6180234f, 0.52336746f, 0.2760374f, 0.07769606f, 0.637682f, 0.085176565f, 0.16043824f, 0.6679482f, 0.8272858f, 0.6635249f, 0.28023627f, 0.9216744f, 0.5184493f, 0.33986536f, 0.83903545f, 0.6198479f, 0.7963929f, 0.63605565f, 0.41838124f, 0.26928508f, 0.05648084f, 0.6071852f, 0.3672051f, 0.54514945f, 0.46253535f, 0.595289f, 0.2197304f, 0.56575435f, 0.33570454f, 0.12949312f, 0.009017748f, 0.82104915f, 0.31175017f, 0.46786937f, 0.9008307f, 0.059177548f, 0.21651942f, 0.58483404f, 0.13534085f, 0.2563066f, 0.98585606f, 0.3444204f, 0.30529618f, 0.9550007f, 0.010194158f, 0.44460547f, 0.4293112f, 0.020983648f, 0.83968806f, 0.5455774f, 0.9872851f, 0.27159318f, 0.16667603f, 0.3916389f, 0.10710736f, 0.70841914f, 0.23437801f, 0.78563285f, 0.25137436f, 0.61097264f, 0.41494665f, 0.20036837f, 0.26286733f, 0.5676644f, 0.2662849f, 0.80940986f, 0.7974582f, 0.5003222f, 0.29910246f, 0.1976132f, 0.30444196f, 0.073145f, 0.68550193f, 0.28199244f, 0.7541317f, 0.11088511f, 0.34996328f, 0.7452604f, 0.42252555f, 0.21781512f, 0.96444f, 0.15884762f, 0.99850196f, 0.5329689f, 0.33807343f, 0.2701225f, 0.6472552f, 0.18246143f, 0.32816347f, 0.81063986f, 0.90712345f, 0.69261926f, 0.44346964f, 0.08311381f, 0.019193182f, 0.3513845f, 0.38967726f, 0.68732834f, 0.45974445f, 0.79513454f, 0.92073804f, 0.61770153f, 0.15796295f, 0.34206834f, 0.61403716f, 0.50911576f, 0.09764764f, 0.4105753f, 0.4610053f, 0.23835297f, 0.7583669f, 0.26223376f, 0.76859593f, 0.82576513f, 0.91628957f, 0.95209956f, 0.34038633f, 0.2481594f, 0.5448205f, 0.94344336f, 0.5867557f, 0.44679952f, 0.35732326f, 0.15309544f, 0.83495915f, 0.8223747f, 0.7383799f, 0.2723741f, 0.37363288f, 0.32874116f, 0.5468127f, 0.5836204f, 0.680963f, 0.28229877f, 0.440675f, 0.058448013f, 0.26188472f, 0.8043764f, 0.92689526f, 0.26310128f, 0.6354866f, 0.915084f, 0.45643163f, 0.87117124f, 0.9500249f, 0.1889253f, 0.5461343f, 0.47915125f, 0.43820933f, 0.13977474f, 0.8290898f, 0.30484903f, 0.5062122f, 0.33160135f, 0.62606835f, 0.65262437f, 0.23008808f, 0.4257683f, 0.13102946f, 0.21824555f, 0.8722663f, 0.26695797f, 0.028245918f, 0.77160543f, 0.10392295f, 0.06169725f, 0.9943042f, 0.8000285f, 0.34662995f, 0.3909258f, 0.6586493f, 0.9920871f, 0.80688536f, 0.84350026f, 0.86506003f, 0.9833786f, 0.1113381f, 0.058909472f, 0.36759707f, 0.1351905f, 0.08711318f, 0.17150986f, 0.97114897f, 0.10649935f, 0.917866f, 0.56674695f, 0.99736273f, 0.6040517f, 0.92105764f, 0.38094944f, 0.48367384f, 0.14886507f, 0.380281f, 0.41597223f, 0.11372275f, 0.9531382f, 0.67997587f, 0.15792394f, 0.3364488f, 0.021841977f, 0.07619969f, 0.7798327f, 0.19889046f, 0.67756367f, 0.50971586f, 0.52456796f, 0.5036354f, 0.7753575f, 0.34809372f, 0.6398678f, 0.4031053f, 0.32557586f, 0.9053469f, 0.8064988f, 0.017155945f, 0.6316684f, 0.45066175f, 0.4873005f, 0.19287354f, 0.57614934f, 0.83062655f, 0.78713834f, 0.68235135f, 0.87318754f, 0.59281385f, 0.064060956f, 0.9382655f, 0.84566283f, 0.5540783f, 0.17840536f, 0.61837703f, 0.60292286f, 0.6568771f, 0.8471286f, 0.17995848f, 0.49391183f, 0.58517873f, 0.5330186f, 0.5795362f, 0.23409952f, 0.5289169f, 0.3746643f, 0.3180484f, 0.5622743f, 0.036257476f, 0.43180978f, 1.3171679E-4f, 0.63862574f, 0.5848303f, 0.94060403f, 0.5878032f, 0.6252845f, 0.18924952f, 0.39612424f, 0.7757128f, 0.9900665f, 0.86055374f, 0.18927997f, 0.84641314f, 0.8975901f, 0.89157784f, 0.57380813f, 0.94526875f, 0.501755f, 0.42647004f, 0.20386614f, 0.4966745f, 0.7561392f, 0.24496855f, 0.13073194f, 0.41784236f, 0.70873123f, 0.7233561f, 0.96866304f, 0.13634546f, 0.049341034f, 0.71949446f, 0.26208475f, 0.5635493f, 0.27563098f, 0.69374204f, 0.078678265f, 0.03588799f, 0.39408693f, 0.7788656f, 0.94594073f, 0.92669946f, 0.41283527f, 0.62035376f, 0.281576f, 0.89905745f, 0.9558993f, 0.0892733f, 0.43785354f, 0.37643972f, 0.23148632f, 0.17041226f, 0.35524517f, 0.88507247f, 0.3892006f, 0.387216f, 0.15375885f, 0.21120822f, 0.24968858f, 0.44297022f, 0.2895735f, 0.15732966f, 0.07728944f, 0.71204036f, 0.6714093f, 0.053016555f, 0.75036585f, 0.23313028f, 0.56734544f, 0.7048986f, 0.8168968f, 0.06141414f, 0.35583347f, 0.07237186f, 0.12143032f, 0.83158904f, 0.6737841f, 0.53340894f, 0.13451897f, 0.24459034f, 0.96684134f, 0.30125558f, 0.39460337f, 0.07498105f, 0.6020688f, 0.11102765f, 0.3656724f, 0.4939227f, 0.21076858f, 0.13569292f, 0.6039172f, 0.08439329f, 0.30890274f, 0.22699659f, 0.64184964f, 0.2754223f, 0.7049345f, 0.63606584f, 0.9549267f, 0.80815446f, 0.17538197f, 0.05759198f, 0.43693244f, 0.26000643f, 0.6929544f, 0.7537442f, 0.61757445f, 0.19318241f, 0.034338124f, 0.8184448f, 0.92103f, 0.97425944f, 0.8894058f, 0.4300163f, 0.88676697f, 0.3483852f, 0.13178374f, 0.95866996f, 0.6248255f, 0.93648285f, 0.08839288f, 0.14454809f, 0.035382055f, 0.3209607f, 0.16345672f, 0.12934527f, 0.3662055f, 0.25347614f, 0.22039147f, 0.07854195f, 0.7695641f, 0.45950922f, 0.093585685f, 0.35322717f, 0.5360373f, 0.6071155f, 0.9050337f, 0.8356653f, 0.55022f, 0.8330065f, 0.92175573f, 0.93212676f, 0.79578835f, 0.44477537f, 0.14613354f, 0.6763672f, 0.27782786f, 0.9030046f, 0.8203768f, 0.6832867f, 0.24530792f, 0.7274624f, 0.3142183f, 0.022943567f, 238.253f, 220.45427f, 267.66333f, 238.0088f, 271.58243f, 273.22388f, 211.78992f, 289.42252f, 217.21829f, 208.85757f, 217.32358f, 207.44218f, 259.48422f, 208.71153f, 268.2896f, 297.33484f, 254.15167f, 232.80293f, 254.54332f, 232.60858f, 238.36755f, 270.21686f, 279.47226f, 282.7281f, 212.87875f, 212.81602f, 277.39685f, 293.25415f, 220.63031f, 259.65414f, 257.0341f, 286.7428f, 202.3495f, 251.0628f, 268.4925f, 237.58267f, 214.1937f, 219.69623f, 294.32617f, 293.98544f, 271.97043f, 277.1976f, 208.15645f, 285.3982f, 275.2406f, 253.17255f, 280.30792f, 210.3171f, 262.86252f, 211.56f, 201.4514f, 237.41928f, 204.32811f, 291.4109f, 246.54733f, 278.7369f, 226.24847f, 262.70038f, 207.41508f, 274.15656f, 250.72443f, 259.09497f, 278.62515f, 298.87927f, 271.1042f, 265.95636f, 228.53195f, 264.95953f, 231.45522f, 238.10721f, 201.05338f, 299.04672f, 203.31392f, 280.5685f, 207.49594f, 288.41803f, 259.77884f, 289.5286f, 212.903f, 232.62526f, 273.2359f, 274.92944f, 228.19473f, 292.2021f, 244.35541f, 235.74893f, 281.4144f, 255.78027f, 261.2293f, 219.03902f, 240.27055f, 210.33026f, 250.7247f, 281.74927f, 296.55548f, 224.49033f, 224.96393f, 219.88365f, 294.07227f, 223.65594f, 273.98865f, 279.8825f, 262.97278f, 269.57916f, 284.82678f, 205.99402f, 230.71436f, 245.10574f, 291.90387f, 221.07706f, 285.6493f, 236.25264f, 225.34695f, 210.36287f, 288.40872f, 299.56335f, 259.16122f, 220.4013f, 235.9941f, 213.55952f, 286.5168f, 261.12793f, 230.74602f, 268.31143f, 226.09164f, 217.6272f, 203.38873f, 240.80707f, 255.07602f, 283.92712f, 218.6427f, 278.5974f, 272.98724f, 211.10165f, 230.14198f, 217.64426f, 228.90018f, 266.22888f, 227.51234f, 218.84616f, 247.46571f, 259.92053f, 212.12146f, 248.02554f, 236.08237f, 277.90137f, 263.06485f, 207.07365f, 275.89902f, 264.8849f, -9.9997225f, -9.9999695f, -9.999966f, -9.9999895f, -9.999834f, -9.999596f, -9.999333f, -9.999578f, -9.99955f, -9.999539f, -9.99926f, -9.999182f, -9.999128f, -9.999777f, -9.999337f, -9.999904f, -9.999079f, -9.99941f, -9.999122f, -9.999788f, -9.999136f, -9.9995165f, -9.999043f, -9.999407f, -9.999571f, -9.999437f, -9.999941f, -9.999134f, -9.999198f, -9.999579f, -9.999475f, -9.999036f, -9.999713f, -9.999731f, -9.999678f, -9.999174f, -9.999507f, -9.999201f, -9.999245f, -9.999307f, -9.999488f, -9.999016f, -9.999532f, -9.999287f, -9.999413f, -9.999584f, -9.99978f, -9.999425f, -9.999651f, -9.999136f, -9.999289f, -9.999958f, -9.9991665f, -9.99916f, -9.999886f, -9.999217f, -9.99971f, -9.999494f, -9.999177f, -9.999025f, -9.999024f, -9.999849f, -9.999718f, -9.99997f, -9.999352f, -9.999563f, -9.999284f, -9.999314f, -9.999419f, -9.999329f, -9.99949f, -9.9992075f, -9.999859f, -9.999224f, -9.999656f, -9.999043f, -9.99958f, -9.999525f, -9.999985f, -9.999004f, -9.999768f, -9.999181f, -9.999919f, -9.999416f, -9.999452f, -9.999608f, -9.999645f, -9.999955f, -9.999919f, -9.999946f, -9.999472f, -9.999145f, -9.999147f, -9.99935f, -9.999072f, -9.999628f, -9.999188f, -9.999702f, -9.999313f, -9.999205f, -9.999878f, -9.999991f, -9.999111f, -9.9991f, -9.999404f, -9.999437f, -9.999719f, -9.999646f, -9.999839f, -9.999222f, -9.999134f, -9.999098f, -9.999538f, -9.999294f, -9.999013f, -9.999872f, -9.99908f, -9.999922f, -9.999595f, -9.999158f, -9.999308f, -9.9995f, -9.99924f, -9.999744f, -9.999338f, -9.999049f, -9.999883f, -9.999513f, -9.999893f, -9.999218f, -9.999468f, -9.999204f, -9.999081f, -9.9994335f, -9.999555f, -9.999373f, -9.999073f, -9.999382f, -9.999415f, -9.999362f, -9.999137f, -9.999514f, -9.999781f, -9.999969f, -9.999229f, -9.999295f, -9.999149f, -9.999783f, -9.999437f, -9.999201f, 0.8368316f, 0.95952296f, 0.7187136f, 0.6472035f, 0.7200239f, 0.82257813f, 0.13384113f, 0.91812044f, 0.9440362f, 0.23334092f, 0.3562596f, 0.20390894f, 0.47781035f, 0.56394255f, 0.8770303f, 0.84794813f, 0.92716575f, 0.3591966f, 0.006163279f, 0.34427875f, 0.30020186f, 0.035439115f, 0.36127335f, 0.1666844f, 0.65421695f, 0.752802f, 0.8639191f, 0.7162624f, 0.10528788f, 0.3911885f, 0.6361361f, 0.33739233f, 0.45225555f, 0.04712947f, 0.9509385f, 0.08811871f, 0.6489793f, 0.563957f, 0.8571504f, 0.47839713f, 0.86719155f, 0.7297759f, 0.9265764f, 0.86381954f, 0.2705895f, 0.80873495f, 0.69725907f, 0.4615118f, 0.98845094f, 0.38829336f, 0.5021872f, 0.051559158f, 0.4416545f, 0.84030825f, 0.028471855f, 0.8019141f, 0.4764789f, 0.73308647f, 0.24829985f, 0.28266567f, 0.1642818f, 0.497284f, 0.9761126f, 0.8595787f, 0.61120987f, 0.48310366f, 0.45415315f, 0.4246855f, 0.35486698f, 0.4365935f, 0.6768876f, 0.36493155f, 0.96304077f, 0.49552417f, 0.8761381f, 0.7559321f, 0.46201146f, 0.50861555f, 0.023068247f, 0.551351f, 0.45992744f, 0.069025f, 0.9549169f, 0.9121757f, 0.35455093f, 0.32405618f, 0.6669353f, 0.16085483f, 0.9973096f, 0.81469834f, 0.47871014f, 0.009814576f, 0.9915644f, 0.4212253f, 0.18318938f, 0.5728494f, 0.3666718f, 0.78813976f, 0.48231423f, 0.723981f, 0.7495278f, 0.7334672f, 0.31657055f, 0.29471073f, 0.2991272f, 0.17905454f, 0.25772056f, 0.04573023f, 0.9155821f, 0.9855648f, 0.9641909f, 0.49942952f, 0.32687747f, 0.3305897f, 0.5485675f, 0.6368628f, 0.09610839f, 0.91397697f, 0.99097943f, 0.7983881f, 0.7839146f, 0.13756526f, 0.058954984f, 0.2574425f, 0.7659589f, 0.8970627f, 0.8955351f, 0.24972673f, 0.3770009f, 0.5416225f, 0.42023486f, 0.4635182f, 0.040502504f, 0.20716274f, 0.08657944f, 0.13138548f, 0.8770457f, 0.6316995f, 0.0990857f, 0.732918f, 0.4953378f, 0.30765584f, 0.21265133f, 0.008900259f, 0.42015043f, 0.25701198f, 0.26232395f, 0.59503317f, 0.37619093f, 0.059471674f, 0.96380097f, 0.6594173f, 0.74392956f, 0.80542815f, 0.5856752f, 0.4709212f, 0.07911475f, 0.8975309f, 0.76675755f, 0.026576402f, 0.012588193f, 0.9571294f, 0.14971007f, 0.42658392f, 0.4339528f, 0.40636125f, 0.418213f, 0.19980216f, 0.8942122f, 0.995247f, 0.026640382f, 0.8785028f, 0.48940244f, 0.3919287f, 0.0862845f, 0.5089264f, 0.17742826f, 0.10345855f, 0.5513259f, 0.7041969f, 0.78375727f, 0.34573317f, 0.34970793f, 0.61609524f, 0.9967575f, 0.19738163f, 0.4390408f, 0.49108744f, 0.5759808f, 0.39300266f, 0.84470737f, 0.3280776f, 0.41459507f, 0.0031824266f, 0.3248213f, 0.21955715f, 0.8830681f, 0.6528493f, 0.7155801f, 0.18756945f, 0.038407642f, 0.048247315f, 0.06908089f, 0.96183145f, 0.8542427f, 0.45350936f, 0.3367257f, 0.26481515f, 0.06306089f, 0.3728015f, 0.4432045f, 0.7682931f, 0.34411287f, 0.018815735f, 0.60152483f, 0.06271082f, 0.30780053f, 0.15404528f, 0.777356f, 0.9382987f, 0.03425807f, 0.74410313f, 0.050881404f, 0.106018655f, 0.9237955f, 0.40959543f, 0.44272372f, 0.42992854f, 0.40163797f, 0.9774989f, 0.7284286f, 0.96605545f, 0.073630586f, 0.7020174f, 0.9556004f, 0.4899371f, 0.2590087f, 0.7959899f, 0.8613244f, 0.7109668f, 0.68005985f, 0.18156524f, 0.68875915f, 0.89809185f, 0.26884466f, 0.46794668f, 0.78001046f, 0.6469185f, 0.03375709f, 0.83638656f, 0.19561735f, 0.72300714f, 0.4323585f, 0.6666231f, 0.6944045f, 0.5573255f, 0.94807935f, 0.40593168f, 0.16260563f, 0.2516181f, 0.5295202f, 0.8144355f, 0.63592476f, 0.40705463f, 0.41550696f, 0.046603993f, 0.23649848f, 0.72142303f, 0.86540526f, 0.9812862f, 0.12677868f, 0.7740198f, 0.028188271f, 0.05125889f, 0.25654867f, 0.7408246f, 0.9826668f, 0.75396377f, 0.6689209f, 0.8002577f, 0.3877432f, 0.83123654f, 0.5672896f, 0.8960579f, 0.39333224f, 0.14590047f, 0.7893236f, 0.38733613f, 0.77125305f, 0.9827144f, 0.014167471f, 0.49262884f, 0.21413602f, 0.67211145f, 0.27530655f, 0.76538646f, 0.5841506f, 0.9951677f, 0.29803824f, 0.024221342f, 0.6438744f, 0.43844396f, 0.35386777f, 0.39374486f, 0.9667755f, 0.26405483f, 0.29369798f, 6.263968E-5f, 0.40577433f, 0.014699541f, 0.8506516f, 0.82061505f, 0.04640132f, 0.38329712f, 0.23627418f, 0.01457501f, 0.920022f, 0.36586156f, 0.54100925f, 0.4094f, 0.9525085f, 0.7759392f, 0.38271114f, 0.9372709f, 0.4954011f, 0.90372294f, 0.5493134f, 0.79789823f, 0.215295f, 0.18560563f, 0.52747923f, 0.015467339f, 0.25793558f, 0.9574369f, 0.8208537f, 0.21616516f, 0.80089974f, 0.4464337f, 0.37760806f, 0.31725752f, 0.07363392f, 0.5414981f, 0.5969112f, 0.6802155f, 0.08681603f, 0.748899f, 0.8132425f, 0.6588185f, 0.7527277f, 0.22249526f, 0.48485887f, 0.52951264f, 0.9087715f, 0.0022171019f, 0.3312975f, 0.70355535f, 0.9905531f, 0.18766245f, 0.8428444f, 0.9489218f, 0.75968647f, 0.16918193f, 0.5090402f, 0.57815427f, 0.41849396f, 0.3353734f, 0.5701858f, 0.59971434f, 0.037876863f, 0.30670634f, 0.08724593f, 0.51724964f, 0.44608638f, 0.8887655f, 0.23586161f, 0.54564106f, 0.17055021f, 0.65770286f, 0.36355573f, 0.11598958f, 0.98736215f, 0.39781153f, 0.8273148f, 0.099607535f, 0.9095583f, 0.63183874f, 0.6119373f, 0.023166118f, 0.42524394f, 0.3938052f, 0.78907496f, 0.7087274f, 0.4950751f, 0.27278492f, 0.36101273f, 0.9821936f, 0.7951266f, 0.8089244f, 0.7677898f, 0.506932f, 0.6540132f, 0.45168075f, 0.82436436f, 0.6100174f, 0.50495255f, 0.95378387f, 0.15670867f, 0.3659073f, 0.34792703f, 0.22730303f, 0.41741064f, 0.5464127f, 0.12390941f, 0.38427374f, 0.64032775f, 0.77376515f, 0.8658444f, 0.7240665f, 0.43486324f, 0.12049561f, 0.8539374f, 0.08333132f, 0.97497743f, 0.09330166f, 0.44820398f, 0.6796943f, 0.48456368f, 0.9055214f, 0.26348707f, 0.658894f, 0.0733997f, 0.1792219f, 0.54822993f, 0.08548857f, 0.6243975f, 0.14298357f, 0.034526028f, 0.094718255f, 0.039160337f, 0.24803995f, 0.7548811f, 0.81707966f, 0.55264014f, 0.4717769f, 0.8132233f, 0.08796681f, 0.46675965f, 0.21120757f, 0.84116185f, 0.02198596f, 233.08963f, 284.46478f, 228.92946f, 299.10284f, 252.34494f, 270.3675f, 247.62338f, 259.12375f, 293.7792f, 292.25543f, 287.2373f, 261.2933f, 234.23328f, 242.85649f, 246.06302f, 211.33946f, 262.4088f, 288.57184f, 280.21918f, 205.70305f, 216.75426f, 287.24652f, 233.86952f, 253.43048f, 228.54883f, 297.02246f, 219.41966f, 230.32181f, 211.07607f, 201.58842f, 255.04857f, 276.64703f, 226.55725f, 285.53146f, 230.61176f, 277.40143f, 217.56476f, 214.18044f, 253.52425f, 286.49228f, 280.64703f, 216.87614f, 229.96323f, 272.0548f, 287.85236f, 209.3926f, 271.86664f, 240.23541f, 299.9867f, 214.53423f, 273.7356f, 253.11342f, 205.02061f, 222.24791f, 242.70433f, 245.3724f, 298.40033f, 289.42432f, 282.7867f, 229.05533f, 289.985f, 271.32953f, 206.18881f, 285.04318f, 280.12766f, 215.771f, 233.6232f, 204.17224f, 242.84424f, 286.33337f, 254.11534f, 209.9334f, 243.23608f, 272.5159f, 205.16878f, 276.64346f, 244.62245f, 294.27008f, 290.36227f, 216.88017f, 298.44403f, 298.37915f, 214.64677f, 255.04266f, 280.10626f, 281.35904f, 236.9879f, 257.5684f, 280.48505f, 238.83212f, 253.65378f, 291.90552f, 228.50763f, 205.08888f, 281.95593f, 252.75293f, 290.4546f, 287.56818f, 210.91739f, 256.31198f, 232.79715f, 269.6927f, 235.58183f, 276.23233f, 227.1755f, 276.03674f, 292.6508f, 285.0999f, 287.64133f, 234.23032f, 296.60068f, 277.18442f, 257.54352f, 254.5871f, 298.60168f, 202.64233f, 255.38023f, 248.32083f, 260.9433f, 205.4068f, 247.34087f, 208.5292f, 202.0934f, 216.09306f, 221.08582f, 257.41556f, 247.06735f, 266.92804f, 210.08488f, 249.02866f, 204.24144f, 263.3803f, 222.9913f, 251.80115f, 218.99036f, 290.71286f, 227.41696f, 204.93797f, 231.20157f, 292.14478f, 297.73837f, 280.12753f, 297.94702f, 228.16396f, 256.27838f, 280.33307f, 205.8249f, 279.23096f, 268.9643f, 231.75375f, -9.999341f, -9.999257f, -9.999949f, -9.999035f, -9.999831f, -9.99975f, -9.999811f, -9.999584f, -9.999827f, -9.999112f, -9.999565f, -9.999383f, -9.999329f, -9.999119f, -9.999867f, -9.999806f, -9.999535f, -9.99903f, -9.99938f, -9.9991255f, -9.999031f, -9.999938f, -9.999783f, -9.999634f, -9.999506f, -9.999364f, -9.999014f, -9.999437f, -9.999991f, -9.999617f, -9.999323f, -9.9991f, -9.999098f, -9.999426f, -9.999119f, -9.999553f, -9.9994545f, -9.999403f, -9.99964f, -9.999833f, -9.99963f, -9.999753f, -9.999862f, -9.999563f, -9.999861f, -9.999462f, -9.99921f, -9.99975f, -9.999412f, -9.99969f, -9.999759f, -9.999703f, -9.999666f, -9.999825f, -9.999146f, -9.999077f, -9.999142f, -9.999701f, -9.999502f, -9.999564f, -9.9995165f, -9.9997835f, -9.999195f, -9.999329f, -9.999829f, -9.999427f, -9.999484f, -9.999804f, -9.999084f, -9.999392f, -9.999105f, -9.999679f, -9.999752f, -9.999843f, -9.999609f, -9.999379f, -9.99906f, -9.999004f, -9.99919f, -9.9998665f, -9.999223f, -9.999334f, -9.999842f, -9.999544f, -9.999025f, -9.999718f, -9.999823f, -9.999554f, -9.99945f, -9.999082f, -9.999171f, -9.999058f, -9.999519f, -9.9995365f, -9.999272f, -9.999615f, -9.999609f, -9.999498f, -9.999642f, -9.999337f, -9.999279f, -9.999857f, -9.999663f, -9.999423f, -9.9990635f, -9.999101f, -9.9993f, -9.999743f, -9.999616f, -9.999779f, -9.99996f, -9.999366f, -9.999638f, -9.999791f, -9.999472f, -9.999714f, -9.999069f, -9.999222f, -9.999011f, -9.999037f, -9.999066f, -9.99982f, -9.999337f, -9.999344f, -9.9998455f, -9.999567f, -9.999952f, -9.9990635f, -9.9993515f, -9.999747f, -9.999756f, -9.999433f, -9.999954f, -9.999456f, -9.999391f, -9.999602f, -9.999213f, -9.999057f, -9.999885f, -9.999203f, -9.999455f, -9.999208f, -9.999754f, -9.99941f, -9.9997015f, -9.999528f, -9.999968f, -9.999105f, -9.999052f, -9.999117f, 0.07731749f, 0.9572599f, 0.2881733f, 0.34789458f, 0.12208096f, 0.3989875f, 0.23046659f, 0.07561615f, 0.7311842f, 0.24280672f, 0.13743502f, 0.32029906f, 0.26720718f, 0.6435275f, 0.71581525f, 0.25040102f, 0.07968058f, 0.9510946f, 0.16737682f, 0.5338542f, 0.96112233f, 0.12613547f, 0.71407163f, 0.017653665f, 0.5663055f, 0.9523341f, 0.66330385f, 0.43527827f, 0.21753095f, 0.6377421f, 0.0820664f, 0.5563942f, 0.105712675f, 0.06655064f, 0.8044171f, 0.6876928f, 0.97473025f, 0.47098678f, 0.23313597f, 0.46495864f, 0.13682419f, 0.19020991f, 0.6946199f, 0.58204114f, 0.008083445f, 0.21409632f, 0.90480167f, 0.06497669f, 0.3296087f, 0.51603156f, 0.49303642f, 0.3029305f, 0.5821996f, 0.5105462f, 0.51879376f, 0.108761f, 0.13990402f, 0.44722676f, 0.8695498f, 0.014239418f, 0.5745597f, 0.52994305f, 0.8318035f, 0.7634822f, 0.677615f, 0.09214777f, 0.705199f, 0.47799557f, 0.24047466f, 0.3105237f, 0.89669865f, 0.6427869f, 0.59037143f, 0.2127864f, 0.27039096f, 0.09363014f, 0.7930851f, 0.58145946f, 0.058050785f, 0.74635893f, 0.34254172f, 0.942883f, 0.8463423f, 0.49698228f, 0.1885729f, 0.2511439f, 0.87867934f, 0.028224535f, 0.7651291f, 0.49802932f, 0.21640365f, 0.69269353f, 0.25175697f, 0.76805496f, 0.75059545f, 0.05755356f, 0.7005975f, 0.9643457f, 0.59199476f, 0.15058741f, 0.8211338f, 0.50831884f, 0.9554822f, 0.10171006f, 0.5546305f, 0.28822696f, 0.8995881f, 0.96590596f, 0.76544195f, 0.23609895f, 0.5093231f, 0.29946357f, 0.44045478f, 0.5974459f, 0.24198511f, 0.13976322f, 0.30026865f, 0.6117198f, 0.54420567f, 0.83931947f, 0.9591503f, 0.055750016f, 0.015446019f, 0.34988365f, 0.6788849f, 0.8000394f, 0.34461623f, 0.8884854f, 0.11765242f, 0.6764313f, 0.70610297f, 0.7528662f, 0.6234379f, 0.95549244f, 0.48107228f, 0.57657474f, 0.35293803f, 0.53558505f, 0.90731245f, 0.6388894f, 0.9061205f, 0.9068154f, 0.82560843f, 0.48359713f, 0.6093791f, 0.25128087f, 0.58313656f, 0.10119824f, 0.14279248f, 0.8000816f, 0.89156765f, 0.12725733f, 0.052655865f, 0.09217951f, 0.20653115f, 0.34572187f, 0.34771374f, 0.30589288f, 0.06053133f, 0.41077146f, 0.9258966f, 0.31344774f, 0.66711676f, 0.04113631f, 0.9229566f, 0.008368838f, 0.5903627f, 0.84122473f, 0.11545232f, 0.7868713f, 0.9680761f, 0.23150893f, 0.4704689f, 0.5499954f, 0.43753204f, 0.7121286f, 0.61013496f, 0.59720284f, 0.92617583f, 0.7834906f, 0.027650753f, 0.8977211f, 0.15754606f, 0.54239666f, 0.18633401f, 0.5662742f, 0.2190944f, 0.59521663f, 0.6435355f, 0.71627194f, 0.037149042f, 0.6100622f, 0.61836076f, 0.1470259f, 0.36966816f, 0.90360576f, 0.5119274f, 0.7205386f, 0.39034662f, 0.62984717f, 0.01017152f, 0.64599174f, 0.15090384f, 0.36933318f, 0.19484489f, 0.09027873f, 0.58042485f, 0.14514206f, 0.036732975f, 0.54077417f, 0.43008235f, 0.15875153f, 0.34932455f, 0.37410876f, 0.8042535f, 0.7739999f, 0.28807458f, 0.97715217f, 0.117083825f, 0.17992087f, 0.9757363f, 0.18320304f, 0.015741833f, 0.9748695f, 0.65635973f, 0.14705919f, 0.037058447f, 0.8968405f, 0.021620478f, 0.5633058f, 0.767505f, 0.12037435f, 0.44985265f, 0.26535192f, 0.22633725f, 0.5835013f, 0.42530164f, 0.6948082f, 0.7116804f, 0.6978424f, 0.82452023f, 0.23771845f, 0.99683344f, 0.70071405f, 0.12593275f, 0.7764756f, 0.36999762f, 0.3072223f, 0.09792935f, 0.43981078f, 0.8204207f, 0.14809668f, 0.7569628f, 0.8288626f, 0.15944423f, 0.21987063f, 0.5351478f, 0.11639127f, 0.9450276f, 0.657273f, 0.48179442f, 0.6428968f, 0.07266802f, 0.54417425f, 0.8990355f, 0.36724177f, 0.4083636f, 0.2944423f, 0.9782087f, 0.15691185f, 0.39151284f, 0.56013423f, 0.049810167f, 0.906521f, 0.9659634f, 0.921944f, 0.30070534f, 0.9883118f, 0.95775986f, 0.13003021f, 0.8573852f, 0.1918365f, 0.10604336f, 0.19914377f, 0.40675613f, 0.024324145f, 0.23431449f, 0.72297823f, 0.7580914f, 0.20346278f, 0.82810277f, 0.32680357f, 0.10711087f, 0.590452f, 0.5469826f, 0.18557824f, 0.51672226f, 0.9832008f, 0.7936118f, 0.5308729f, 0.37090248f, 0.7742029f, 0.4481485f, 0.5493372f, 0.50338376f, 0.43103522f, 0.53751975f, 0.70061314f, 0.021088583f, 0.3308669f, 0.8162114f, 0.5326165f, 0.35944003f, 0.9206047f, 0.6406876f, 0.50699484f, 0.8470867f, 0.9593492f, 0.7875809f, 0.9962247f, 0.23328215f, 0.7006755f, 0.5442194f, 0.6375928f, 0.33889383f, 0.9687761f, 0.5783294f, 0.9320834f, 0.88320315f, 0.7495404f, 0.5102735f, 0.22573441f, 0.51124907f, 0.9721347f, 0.44289282f, 0.37883982f, 0.33592433f, 0.40807053f, 0.7348208f, 0.059953105f, 0.020652194f, 0.373106f, 0.35336265f, 0.029604226f, 0.6272284f, 0.6029403f, 0.49051753f, 0.398493f, 0.4539566f, 0.2655247f, 0.9981165f, 0.75446373f, 0.46822912f, 0.648188f, 0.324949f, 0.9306804f, 0.8809041f, 0.42844233f, 0.38464552f, 0.76389503f, 0.7626695f, 0.63432926f, 0.33961716f, 0.61165744f, 0.7148871f, 0.4873704f, 0.49829185f, 0.5820676f, 0.40672466f, 0.51494414f, 0.883497f, 0.78602934f, 0.24558222f, 0.5361903f, 0.69763577f, 0.26757947f, 0.4059913f, 0.862289f, 0.7588195f, 0.18907034f, 0.42610446f, 0.08498969f, 0.02107262f, 0.2888108f, 0.90481687f, 0.03300186f, 0.61184776f, 0.41099504f, 0.27365708f, 0.27691156f, 0.01747882f, 0.71713996f, 0.40858844f, 0.7091915f, 0.2785737f, 0.87971973f, 0.015822828f, 0.058852635f, 0.54861325f, 0.4243099f, 0.07972601f, 0.7242567f, 0.3915925f, 0.85279524f, 0.5510232f, 0.88121253f, 0.55209786f, 0.9690384f, 0.910818f, 0.4399193f, 0.08753263f, 0.25317103f, 0.28638893f, 0.08940263f, 0.62953717f, 0.13840295f, 0.6593923f, 0.27087918f, 0.54218894f, 0.7974436f, 0.03127277f, 0.13191597f, 0.3672008f, 0.45645824f, 0.50062525f, 0.59150535f, 0.53669804f, 0.87231857f, 0.083159134f, 0.30086067f, 0.57798487f, 0.6605887f, 0.46329933f, 0.7809135f, 0.3256513f, 0.42846498f, 0.43590286f, 0.7588255f, 0.112232044f, 0.45630154f, 0.85721415f, 0.36618492f, 0.3291177f, 0.3065707f, 0.258635f, 0.93674284f, 0.267144f, 0.94944286f, 0.03034833f, 0.43545058f, 277.44568f, 293.30225f, 290.0967f, 226.36577f, 263.3507f, 233.65721f, 271.0456f, 201.33302f, 244.87222f, 248.06546f, 283.55505f, 273.16003f, 273.43265f, 248.35196f, 261.96664f, 252.17625f, 213.653f, 268.57755f, 241.37634f, 275.69666f, 231.28116f, 238.647f, 267.70135f, 270.0771f, 278.84747f, 232.92476f, 227.37221f, 290.46814f, 282.7081f, 210.15854f, 275.31555f, 260.04895f, 283.80142f, 227.62625f, 267.77484f, 245.33005f, 251.6941f, 232.47691f, 220.30089f, 292.46063f, 252.57907f, 262.54684f, 254.58533f, 239.21768f, 246.7902f, 254.07513f, 230.66675f, 288.9232f, 216.71547f, 214.78873f, 279.40067f, 210.46289f, 269.7311f, 258.03143f, 220.68816f, 220.33643f, 290.5327f, 217.04453f, 203.5228f, 236.82892f, 271.18365f, 253.44327f, 206.32324f, 243.99203f, 285.42123f, 208.0186f, 235.3223f, 215.7981f, 281.17578f, 258.11807f, 235.2606f, 226.48712f, 280.93256f, 280.83173f, 243.42778f, 266.36462f, 236.26477f, 295.47427f, 273.871f, 293.18738f, 276.67422f, 232.46318f, 218.5724f, 278.0185f, 260.68582f, 216.33072f, 202.01517f, 256.0112f, 260.35217f, 285.29895f, 282.32895f, 204.90137f, 202.91895f, 201.99902f, 234.42209f, 232.87006f, 296.0879f, 282.7151f, 260.2f, 263.00598f, 245.1402f, 220.98419f, 227.66153f, 298.27438f, 288.2768f, 246.6337f, 247.41647f, 229.84933f, 200.41792f, 256.62027f, 207.03185f, 235.04187f, 269.5741f, 279.07892f, 279.92096f, 266.31543f, 277.62415f, 282.93802f, 244.6243f, 261.97354f, 287.40088f, 285.73053f, 210.00949f, 235.31769f, 267.29855f, 256.89893f, 225.80467f, 241.72736f, 243.78555f, 230.197f, 220.44577f, 286.22617f, 295.29068f, 248.73352f, 271.84897f, 295.86597f, 274.50906f, 285.53323f, 254.3574f, 246.36845f, 232.46686f, 202.37822f, 232.31885f, 284.55515f, 281.44986f, 288.22656f, 224.62955f, 257.4739f, 277.62314f, 233.47943f, -9.999561f, -9.999684f, -9.999829f, -9.999858f, -9.999566f, -9.999728f, -9.999245f, -9.999897f, -9.999244f, -9.999921f, -9.999919f, -9.999612f, -9.999473f, -9.9995575f, -9.999303f, -9.999789f, -9.999555f, -9.999162f, -9.999468f, -9.999969f, -9.999672f, -9.999807f, -9.999847f, -9.99909f, -9.999817f, -9.999831f, -9.999489f, -9.999215f, -9.999848f, -9.9998455f, -9.999323f, -9.999817f, -9.999044f, -9.999408f, -9.999863f, -9.999365f, -9.99908f, -9.99931f, -9.99933f, -9.99975f, -9.999039f, -9.99978f, -9.999931f, -9.99974f, -9.999948f, -9.999952f, -9.999335f, -9.999389f, -9.999414f, -9.999315f, -9.999753f, -9.999389f, -9.99995f, -9.999082f, -9.999573f, -9.999592f, -9.9998f, -9.999939f, -9.999826f, -9.999052f, -9.99905f, -9.999516f, -9.999568f, -9.999664f, -9.999201f, -9.9993f, -9.999386f, -9.999858f, -9.999468f, -9.99966f, -9.999665f, -9.999242f, -9.9997425f, -9.99912f, -9.999361f, -9.999368f, -9.999324f, -9.999566f, -9.999074f, -9.99973f, -9.99977f, -9.999092f, -9.99947f, -9.999531f, -9.999189f, -9.99918f, -9.999814f, -9.999811f, -9.999523f, -9.999692f, -9.999746f, -9.999281f, -9.999508f, -9.999807f, -9.999763f, -9.999359f, -9.999442f, -9.999778f, -9.999925f, -9.999119f, -9.999002f, -9.999579f, -9.999089f, -9.999878f, -9.9991865f, -9.999503f, -9.99901f, -9.9991865f, -9.999055f, -9.999055f, -9.9990225f, -9.999116f, -9.999345f, -9.999241f, -9.999561f, -9.999711f, -9.999534f, -9.999722f, -9.999037f, -9.99902f, -9.999436f, -9.999547f, -9.9997425f, -9.999701f, -9.999172f, -9.99957f, -9.99917f, -9.999358f, -9.999515f, -9.9994545f, -9.999549f, -9.99922f, -9.999552f, -9.999457f, -9.999204f, -9.999363f, -9.99935f, -9.999776f, -9.999162f, -9.999254f, -9.99992f, -9.999504f, -9.9991f, -9.999846f, -9.99928f, -9.99955f, -9.999984f, -9.999683f, -9.999582f, -9.999975f, 0.4054413f, 0.49212277f, 0.9723238f, 0.72839403f, 0.6485173f, 0.11651259f, 0.10785521f, 0.032620244f, 0.023706913f, 0.3086147f, 0.47183102f, 0.992096f, 0.99172103f, 0.34033036f, 0.95944905f, 0.22414577f, 0.06989748f, 0.5614623f, 0.97281843f, 0.52306736f, 0.053522028f, 0.50254625f, 0.51301396f, 0.5985718f, 0.0371569f, 0.8265822f, 0.4661505f, 0.4922629f, 0.81253344f, 0.9696686f, 0.60658884f, 0.8239178f, 0.15269178f, 0.939187f, 0.14531301f, 0.37456673f, 0.779733f, 0.418844f, 0.66610193f, 0.5676376f, 0.8005674f, 0.31309485f, 0.03271992f, 0.36289623f, 0.5230104f, 0.9365938f, 0.54856783f, 0.38090333f, 0.677641f, 0.98534113f, 0.6625885f, 0.9755095f, 0.078554325f, 0.018032718f, 0.8922824f, 0.9402988f, 0.7797243f, 0.5073222f, 0.8464975f, 0.7056091f, 0.49532133f, 0.42082825f, 0.39204183f, 0.7350382f, 0.7106082f, 0.7145868f, 0.7029236f, 0.22454071f, 0.9618653f, 0.4929038f, 0.58743435f, 0.22425091f, 0.52113986f, 0.29244232f, 0.58773226f, 0.17996566f, 0.16191864f, 0.8782989f, 0.6559272f, 0.45498922f, 0.109633766f, 0.29422963f, 0.28020766f, 0.45128867f, 0.34663188f, 0.011857478f, 0.13049418f, 0.39511293f, 0.15442526f, 0.98196644f, 0.74726933f, 0.20202826f, 0.066193216f, 0.6910641f, 0.91542566f, 0.36986846f, 0.36708114f, 0.7992493f, 0.66625875f, 0.9589232f, 0.58173925f, 0.2632916f, 0.8744973f, 0.869903f, 0.27612343f, 0.43633205f, 0.0069335676f, 0.46793646f, 0.6261623f, 0.8301051f, 0.4103617f, 0.583117f, 0.9595133f, 0.092884764f, 0.6108136f, 0.9563768f, 0.13297999f, 0.9781464f, 0.1866522f, 0.6501296f, 0.940671f, 0.5299086f, 0.9236821f, 0.8280376f, 0.5605807f, 0.08746594f, 0.99765533f, 0.9831952f, 0.3346039f, 0.45981014f, 0.16059282f, 0.898296f, 0.24069251f, 0.84168667f, 0.42612913f, 0.840821f, 0.06970532f, 0.6529262f, 0.21027155f, 0.6587761f, 0.8506848f, 0.23469605f, 0.8375965f, 0.6650027f, 0.6900568f, 0.03741631f, 0.90703416f, 0.60072684f, 0.041207824f, 0.20454895f, 0.13258597f, 0.38379464f, 0.5782676f, 0.37454012f, 0.788924f, 0.6553679f, 0.6696084f, 0.194304f, 0.18800853f, 0.42950943f, 0.70689565f, 0.837481f, 0.14751653f, 0.56871074f, 0.7577148f, 0.7652816f, 0.19738932f, 0.9059352f, 0.97273886f, 0.51461357f, 0.1711977f, 0.5120307f, 0.22731306f, 0.5407244f, 0.2804785f, 0.05774873f, 0.80988765f, 0.7796792f, 0.31191307f, 0.39822164f, 0.5347025f, 0.07349863f, 0.21531169f, 0.07873698f, 0.8192433f, 0.722044f, 0.40318736f, 0.8964449f, 0.49459186f, 0.9010825f, 0.45778024f, 0.80724466f, 0.38512704f, 0.38782215f, 0.13246128f, 0.7218372f, 0.7401796f, 0.84869057f, 0.56868243f, 0.3278968f, 0.019229556f, 0.43221912f, 0.693255f, 0.43167397f, 0.78483266f, 0.09825686f, 0.5116548f, 0.1271103f, 0.18708695f, 0.95848906f, 0.23714672f, 0.52546054f, 0.5915945f, 0.7894098f, 0.8593355f, 0.31078282f, 0.28504592f, 0.85881007f, 0.29736793f, 0.50781727f, 0.65514153f, 0.44968098f, 0.9075563f, 0.7546295f, 0.45364478f, 0.29375777f, 0.94780463f, 0.6616151f, 0.01726944f, 0.9249832f, 0.9179415f, 0.6749661f, 0.43883613f, 0.37391648f, 0.65078586f, 0.21732111f, 0.02359236f, 0.007791354f, 0.30327088f, 0.31245363f, 0.84185934f, 0.49694976f, 0.93794364f, 0.8528437f, 0.7000397f, 0.5224565f, 0.8105422f, 0.99443287f, 0.847529f, 0.15470129f, 0.8077305f, 0.5341055f, 0.23147497f, 0.40932575f, 0.96443266f, 0.09061932f, 0.05683991f, 0.99754393f, 0.11661421f, 0.19272684f, 0.3620329f, 0.45262036f, 0.03901034f, 0.06041548f, 0.0075550857f, 0.27494353f, 0.67014945f, 0.2957977f, 0.2216069f, 0.6506188f, 0.45587075f, 0.28567624f, 0.5888963f, 0.98453754f, 0.8699843f, 0.9340606f, 0.0642961f, 0.14302005f, 0.7717978f, 0.75930613f, 0.6141049f, 0.4101332f, 0.27772737f, 0.28117037f, 0.8098905f, 0.5942f, 0.7786375f, 0.4493845f, 0.5141761f, 0.744234f, 0.34754843f, 0.9057713f, 0.29356617f, 0.41850287f, 0.25478244f, 0.78619635f, 0.70232016f, 0.7863453f, 0.57700616f, 0.3423882f, 0.11562478f, 0.6069529f, 0.7797115f, 0.2574891f, 0.51921356f, 0.2538803f, 0.670748f, 0.82137585f, 0.47364834f, 0.9369771f, 0.1801538f, 0.5134379f, 0.3520003f, 0.38112086f, 0.29870084f, 0.55816495f, 0.95891315f, 0.3729329f, 0.7877428f, 0.029987516f, 0.37669265f, 0.10563303f, 0.14064822f, 0.4556408f, 0.86550975f, 0.73312205f, 0.09095184f, 0.9431056f, 0.372078f, 0.4691022f, 0.72663444f, 0.5589779f, 0.98812455f, 0.1695335f, 0.8314304f, 0.7852622f, 0.61309403f, 0.10439321f, 0.76670945f, 0.5409888f, 0.9157445f, 0.57858527f, 0.14883776f, 0.20041484f, 0.30621874f, 0.9036323f, 0.9339205f, 0.9151604f, 0.12393201f, 0.929967f, 0.35930997f, 0.2358306f, 0.6697985f, 0.31414795f, 0.30049297f, 0.89661825f, 0.27027792f, 0.17256655f, 0.9318595f, 0.81196785f, 0.38976404f, 0.293463f, 0.2512547f, 0.81138444f, 0.988779f, 0.27900514f, 0.4261041f, 0.61765677f, 0.8339683f, 0.25210267f, 0.51324797f, 0.92285997f, 0.0889822f, 0.5169889f, 0.3989031f, 0.6554801f, 0.9353766f, 0.544529f, 0.123369224f, 0.34246746f, 0.2115331f, 0.26744205f, 0.71749866f, 0.22343503f, 0.64539504f, 0.67429143f, 0.41868812f, 0.40186298f, 0.098477215f, 0.88132435f, 0.07625152f, 0.043012597f, 0.6452063f, 0.2102687f, 0.22173183f, 0.10345679f, 0.7434575f, 0.7126712f, 0.76721144f, 0.6512526f, 0.15990873f, 0.11895295f, 0.77731425f, 0.5243528f, 0.694658f, 0.86524415f, 0.75635976f, 0.057310082f, 0.16338252f, 0.78290933f, 0.7817539f, 0.8036517f, 0.33238873f, 0.676157f, 0.6762056f, 0.16322272f, 0.87960654f, 0.36118373f, 0.32454377f, 0.763408f, 0.506997f, 0.6956684f, 0.9279813f, 0.20323144f, 0.5839603f, 0.5633559f, 0.6701542f, 0.25721762f, 0.9896909f, 0.95511895f, 0.9082311f, 0.29406747f, 0.60026234f, 0.93644714f, 0.61788774f, 0.66341126f, 0.20749137f, 0.52809435f, 0.30916053f, 0.59821826f, 0.42163637f, 0.8293481f, 0.9711802f, 0.7839911f, 0.7657031f, 0.5351135f, 0.6362381f, 0.5429735f, 0.29129192f, 0.74155486f, 256.6196f, 299.92203f, 283.1842f, 257.95f, 242.67941f, 283.13525f, 297.3768f, 209.21597f, 298.94897f, 272.28577f, 208.13962f, 224.24684f, 215.7119f, 289.45593f, 248.60497f, 291.094f, 261.66168f, 291.05728f, 280.15112f, 246.94473f, 281.08008f, 221.38707f, 231.09238f, 220.10115f, 219.70961f, 273.52057f, 298.6576f, 250.59302f, 203.40039f, 227.90755f, 208.1463f, 211.84389f, 251.76518f, 275.46594f, 292.12732f, 277.5088f, 281.66544f, 274.27924f, 291.94995f, 282.94733f, 231.35228f, 229.87643f, 226.04532f, 246.81201f, 285.92133f, 211.72032f, 265.00046f, 292.0401f, 217.145f, 258.9742f, 241.07838f, 297.71396f, 265.03607f, 293.78973f, 215.46487f, 271.7528f, 297.20273f, 234.13841f, 253.58505f, 252.52872f, 224.75195f, 218.48878f, 204.55463f, 293.8269f, 283.58505f, 264.1618f, 226.64536f, 280.69232f, 218.0678f, 219.11906f, 209.70735f, 215.2419f, 227.23471f, 226.22966f, 292.78833f, 250.87213f, 220.66672f, 292.0923f, 214.3262f, 220.62033f, 292.90533f, 294.61047f, 210.68884f, 260.9642f, 262.28113f, 255.0517f, 232.66026f, 294.8312f, 206.05696f, 289.73633f, 235.66345f, 232.93633f, 263.52408f, 256.7292f, 210.22684f, 229.51805f, 282.41776f, 211.0127f, 239.21553f, 235.43231f, 278.32697f, 299.7943f, 247.10483f, 219.1755f, 224.00432f, 263.2412f, 276.8183f, 291.88232f, 233.7261f, 241.75543f, 261.45193f, 296.58963f, 203.90746f, 277.9264f, 245.81134f, 261.24277f, 212.32646f, 242.76822f, 241.22888f, 224.0751f, 267.85315f, 232.49553f, 272.37656f, 253.20465f, 206.93951f, 201.29115f, 257.55444f, 296.3969f, 259.25177f, 292.10406f, 267.9734f, 253.28792f, 210.03741f, 272.03717f, 284.04358f, 292.52087f, 253.26274f, 207.37628f, 263.50598f, 228.07819f, 237.00746f, 241.3014f, 278.94174f, 214.41554f, 270.15442f, 264.77567f, 206.68633f, 229.17867f, 238.87085f, 254.12152f, -9.999742f, -9.999057f, -9.999062f, -9.999852f, -9.999382f, -9.999388f, -9.999354f, -9.999587f, -9.999273f, -9.999814f, -9.999888f, -9.999484f, -9.999295f, -9.999065f, -9.999623f, -9.999145f, -9.999381f, -9.999056f, -9.99943f, -9.999615f, -9.999143f, -9.999795f, -9.999838f, -9.999658f, -9.999616f, -9.9998f, -9.999448f, -9.999215f, -9.999058f, -9.999626f, -9.999816f, -9.99952f, -9.999158f, -9.999308f, -9.999545f, -9.999357f, -9.999205f, -9.999506f, -9.999683f, -9.999209f, -9.9999895f, -9.999543f, -9.999428f, -9.999628f, -9.999103f, -9.9991455f, -9.999936f, -9.999467f, -9.999748f, -9.99912f, -9.999807f, -9.999134f, -9.999681f, -9.999262f, -9.999087f, -9.999329f, -9.999385f, -9.999264f, -9.999793f, -9.999045f, -9.9995985f, -9.999204f, -9.999249f, -9.999444f, -9.9992075f, -9.9998455f, -9.999957f, -9.999949f, -9.999563f, -9.999786f, -9.999491f, -9.999651f, -9.999318f, -9.999416f, -9.999064f, -9.999325f, -9.9996f, -9.999902f, -9.999786f, -9.99952f, -9.999172f, -9.999215f, -9.999257f, -9.9991865f, -9.999605f, -9.999594f, -9.999224f, -9.999279f, -9.999259f, -9.999697f, -9.9996195f, -9.999134f, -9.999058f, -9.999047f, -9.999575f, -9.999919f, -9.999645f, -9.999633f, -9.999902f, -9.999141f, -9.999885f, -9.999965f, -9.999505f, -9.99982f, -9.999797f, -9.99964f, -9.999083f, -9.9995775f, -9.9999695f, -9.999383f, -9.999018f, -9.999117f, -9.99926f, -9.99911f, -9.999243f, -9.999118f, -9.99911f, -9.999486f, -9.99909f, -9.999861f, -9.999171f, -9.9999275f, -9.999972f, -9.999925f, -9.999671f, -9.999307f, -9.9994955f, -9.999324f, -9.999028f, -9.999182f, -9.999585f, -9.999082f, -9.999469f, -9.999043f, -9.999628f, -9.9994335f, -9.999068f, -9.999732f, -9.999809f, -9.999425f, -9.99959f, -9.999719f, -9.999516f, -9.999942f, -9.999832f, -9.999641f, -9.999447f, -9.99934f, -9.999968f, -9.999992f, 0.639171f, 0.47615534f, 0.1366003f, 0.4112621f, 0.543977f, 0.6301188f, 0.72094375f, 0.41664115f, 0.6702276f, 0.2662457f, 0.34709758f, 0.0047021024f, 0.19731691f, 0.3105783f, 0.35764986f, 0.6188618f, 0.55722684f, 0.014176953f, 0.28426266f, 0.55528253f, 0.9861382f, 0.59125423f, 0.91971123f, 0.50413203f, 0.71612626f, 0.37045076f, 0.16731057f, 0.8361767f, 0.20203081f, 0.46268502f, 0.54416966f, 0.82547253f, 0.70076334f, 0.19353609f, 0.7197332f, 0.7577992f, 0.15850778f, 0.09100532f, 0.8406752f, 0.4743588f, 0.14548168f, 0.91383964f, 0.31233132f, 0.057911392f, 0.38550714f, 0.788842f, 0.45663434f, 0.87255025f, 0.6822182f, 0.27235323f, 0.8781251f, 0.8971649f, 0.6117316f, 0.5027711f, 0.7707731f, 0.8171592f, 0.99433446f, 0.3228524f, 0.10424189f, 0.9995735f, 0.07680203f, 0.16278757f, 0.87946606f, 0.8840557f, 0.45882654f, 0.5382355f, 0.17185123f, 0.19348888f, 0.08070494f, 0.8351659f, 0.59116447f, 0.3656219f, 0.38914752f, 0.8038363f, 0.21394636f, 0.6494243f, 0.2923405f, 0.096409395f, 0.81489897f, 0.2177272f, 0.5156461f, 0.28180742f, 0.15846203f, 0.38402006f, 0.6799602f, 0.0992625f, 0.42167094f, 0.5157946f, 0.5737303f, 0.61967856f, 0.27188474f, 0.33863726f, 0.8381059f, 0.9284707f, 0.81110543f, 0.14615615f, 0.5137047f, 0.4068576f, 0.27341366f, 0.6371842f, 0.46284974f, 0.6114867f, 0.71931726f, 0.91663635f, 0.60304374f, 0.14932536f, 0.88403726f, 0.54094154f, 0.1467738f, 0.97935086f, 0.7863954f, 0.2147064f, 0.012224621f, 0.14325804f, 0.65899223f, 0.5648787f, 0.65609366f, 0.8197612f, 0.6399177f, 0.8468733f, 0.76479703f, 0.25536442f, 0.5532024f, 0.95500815f, 0.39078063f, 0.5678974f, 0.21131837f, 0.987159f, 0.27899948f, 0.45318067f, 0.052973147f, 0.22060722f, 0.13576879f, 0.22578368f, 0.4504141f, 0.81624466f, 0.6962496f, 0.38475657f, 0.5542052f, 0.040127296f, 0.7824744f, 0.7515341f, 0.2940618f, 0.45921704f, 0.74931914f, 0.4590101f, 0.1761703f, 0.76585937f, 0.3804439f, 0.20216002f, 0.79364806f, 0.48445576f, 0.9997787f, 0.07572355f, 0.9185397f, 0.43292367f, 0.6824889f, 0.57344544f, 0.45387882f, 0.61218095f, 0.001530312f, 0.36701044f, 0.3732282f, 0.21642086f, 0.0032335173f, 0.9757738f, 0.6631197f, 0.84142756f, 0.23562978f, 0.8842848f, 0.24768245f, 0.6896844f, 0.093373105f, 0.47206926f, 0.018847544f, 0.3574926f, 0.7817249f, 0.3901984f, 0.37762666f, 0.60320383f, 0.5876514f, 0.8498338f, 0.6137263f, 0.64150596f, 0.8912183f, 0.18202206f, 0.07165835f, 0.54631984f, 0.14491297f, 0.46619728f, 0.5531275f, 0.9730491f, 0.3560192f, 0.5463067f, 0.9498098f, 0.6082786f, 0.12641688f, 0.27168056f, 0.449438f, 0.2710077f, 0.059393216f, 0.47376275f, 0.3349298f, 0.8534693f, 0.24378222f, 0.27263063f, 0.31725782f, 0.027660795f, 0.36858514f, 0.31543452f, 0.32232106f, 0.7514354f, 0.7665531f, 0.93814677f, 0.94667625f, 0.7495306f, 0.07630936f, 0.07085721f, 0.09998243f, 0.14326382f, 0.3722598f, 0.8195573f, 0.88503057f, 0.64455885f, 0.9708746f, 0.574863f, 0.7547003f, 0.663569f, 0.62627494f, 0.66573906f, 0.88241595f, 0.5472183f, 0.10965517f, 0.086363465f, 0.03911088f, 0.43472022f, 0.282755f, 0.81878805f, 0.7069662f, 0.6482738f, 0.7889657f, 0.13123439f, 0.5466046f, 0.9870477f, 0.65994346f, 0.044764873f, 0.2590037f, 0.21607089f, 0.7882748f, 0.030434562f, 0.7240241f, 0.24359426f, 0.24925096f, 0.50715107f, 0.8548116f, 0.5778587f, 0.81658524f, 0.8406002f, 0.26860788f, 0.308281f, 0.40139812f, 0.27045614f, 0.681128f, 0.55732554f, 0.77117866f, 0.025454784f, 0.045293983f, 0.27430618f, 0.24866389f, 0.9072126f, 0.21633524f, 0.986974f, 0.91918707f, 0.86734384f, 0.5860722f, 0.8918684f, 0.86775124f, 0.24765202f, 0.7032609f, 0.4580694f, 0.6150063f, 0.12584582f, 0.13061108f, 0.11944151f, 0.27304602f, 0.08538959f, 0.2935459f, 0.6501564f, 0.6911091f, 0.79428184f, 0.19728307f, 0.9433592f, 0.98402375f, 0.278235f, 0.6931662f, 0.32246152f, 0.7604209f, 0.323686f, 0.4490462f, 0.21253695f, 0.37495488f, 0.095260054f, 0.5237899f, 0.9992169f, 0.36044437f, 0.5078252f, 0.5861082f, 0.64059675f, 0.03762793f, 0.49785113f, 0.38858363f, 0.69295675f, 0.2873984f, 0.32729995f, 0.59859157f, 0.73461634f, 0.25285175f, 0.5567667f, 0.71841735f, 0.69814867f, 0.77477485f, 0.16508374f, 0.15479185f, 0.48362815f, 0.37302348f, 0.7408702f, 0.11581469f, 0.08464117f, 0.029988535f, 0.34612563f, 0.45165575f, 0.68815565f, 0.008550999f, 0.09454897f, 0.8842033f, 0.471434f, 0.16433838f, 0.5935435f, 0.8646248f, 0.57239705f, 0.65469956f, 0.5863223f, 0.4796355f, 0.59167236f, 0.54985625f, 0.39255446f, 0.61727005f, 0.50840545f, 0.3316757f, 0.74857223f, 0.35827267f, 0.8872402f, 0.8038483f, 0.3931879f, 0.70447254f, 0.16417824f, 0.42719653f, 0.7534679f, 0.57123446f, 0.34724474f, 0.54931104f, 0.39288715f, 0.42828634f, 0.8222923f, 0.8765563f, 0.94212073f, 0.12068056f, 0.70422703f, 0.2824587f, 0.027603716f, 0.52777815f, 0.5066046f, 0.5769824f, 0.07630827f, 0.103958726f, 0.1505021f, 0.24175929f, 0.50438327f, 0.6733676f, 0.35198468f, 0.0752788f, 0.7415916f, 0.42589715f, 0.761479f, 0.0033971865f, 0.91897255f, 0.9319753f, 0.81370807f, 0.79544336f, 0.23588327f, 0.9587119f, 0.71191025f, 0.42136034f, 0.19574885f, 0.54185784f, 0.008105425f, 0.14255908f, 0.63592f, 0.3044852f, 0.6324764f, 0.6508548f, 0.08161495f, 0.65241224f, 0.8424147f, 0.97779244f, 0.72876996f, 0.61530423f, 0.94752645f, 0.6066642f, 0.10435986f, 0.18537253f, 0.30024627f, 0.8787194f, 0.06873524f, 0.91032326f, 0.84761214f, 0.12825106f, 0.22760965f, 0.70036477f, 0.09428674f, 0.9861057f, 0.13853452f, 0.8474568f, 0.057899747f, 0.060172286f, 0.37916803f, 0.15240528f, 0.77621406f, 0.26485768f, 0.1740309f, 0.29064766f, 0.7386373f, 0.5348933f, 0.26158985f, 0.43255532f, 0.59368885f, 0.61983097f, 0.13413209f, 0.32573816f, 0.43871734f, 0.7316835f, 0.7375361f, 0.8791016f, 0.46889958f, 0.8362294f, 0.56079483f, 0.78738517f, 0.12909074f, 0.19669758f, 0.3654093f, 257.23004f, 205.25952f, 256.3495f, 287.5462f, 248.0553f, 279.42828f, 252.23164f, 293.8083f, 244.82593f, 241.14514f, 264.60312f, 242.02669f, 265.36676f, 285.9313f, 276.8894f, 264.85254f, 204.56178f, 216.75874f, 245.4952f, 212.06345f, 205.75478f, 284.3255f, 291.17203f, 219.69725f, 203.70792f, 225.91046f, 230.73822f, 262.73547f, 201.7526f, 212.36281f, 283.3116f, 294.07062f, 249.66954f, 283.85126f, 246.5827f, 207.68987f, 272.6758f, 240.09421f, 275.82172f, 225.84433f, 232.80176f, 201.71077f, 252.89136f, 240.62161f, 259.20868f, 247.87543f, 218.64772f, 248.03424f, 202.67117f, 238.984f, 290.77563f, 293.03915f, 289.35855f, 289.96945f, 286.17395f, 231.49643f, 251.10532f, 225.1938f, 206.88234f, 256.4651f, 239.51657f, 245.26834f, 247.59836f, 204.23398f, 203.37993f, 225.53943f, 267.85843f, 297.7295f, 265.553f, 295.24786f, 242.70523f, 286.44165f, 283.38336f, 251.81482f, 208.90456f, 257.36407f, 229.28513f, 290.7318f, 258.70337f, 223.44356f, 264.08783f, 275.03732f, 251.59811f, 292.53107f, 251.5335f, 244.22394f, 213.89952f, 236.25047f, 211.8138f, 220.5794f, 216.87543f, 233.37456f, 224.4222f, 295.09964f, 214.58566f, 281.3576f, 256.06107f, 241.79654f, 291.32068f, 239.49226f, 228.46638f, 218.16322f, 203.63048f, 299.67514f, 282.89703f, 265.6753f, 287.9343f, 239.81447f, 209.17609f, 262.6297f, 295.4711f, 205.0095f, 223.62189f, 286.34204f, 243.34543f, 237.4936f, 249.12177f, 232.68518f, 229.49867f, 224.16684f, 203.26491f, 272.76715f, 294.89102f, 286.48096f, 273.26846f, 273.41534f, 204.2877f, 210.98381f, 206.86124f, 265.20584f, 244.88943f, 266.12534f, 239.2653f, 286.19138f, 271.75153f, 267.04507f, 210.73386f, 233.14261f, 220.80898f, 273.75244f, 298.48633f, 268.37622f, 204.67131f, 289.64368f, 276.43658f, 290.26245f, 279.004f, 201.35966f, 207.23166f, 280.78134f, -9.999485f, -9.999401f, -9.99988f, -9.99983f, -9.999996f, -9.999282f, -9.999148f, -9.999958f, -9.999139f, -9.999945f, -9.999827f, -9.999956f, -9.999576f, -9.999011f, -9.99982f, -9.999912f, -9.999579f, -9.9990425f, -9.999927f, -9.999287f, -9.999705f, -9.999723f, -9.999244f, -9.999403f, -9.999639f, -9.999259f, -9.999532f, -9.999533f, -9.999703f, -9.999582f, -9.999963f, -9.99968f, -9.999428f, -9.999266f, -9.999494f, -9.999798f, -9.999454f, -9.999226f, -9.99951f, -9.999481f, -9.999743f, -9.99988f, -9.999303f, -9.999975f, -9.999095f, -9.99945f, -9.999369f, -9.999166f, -9.99957f, -9.999976f, -9.999418f, -9.999267f, -9.99994f, -9.999312f, -9.999308f, -9.999992f, -9.9999f, -9.999182f, -9.9991665f, -9.999685f, -9.999133f, -9.999587f, -9.999473f, -9.999556f, -9.999567f, -9.999451f, -9.999944f, -9.999353f, -9.999919f, -9.999077f, -9.99981f, -9.999687f, -9.999805f, -9.999417f, -9.999404f, -9.999712f, -9.99989f, -9.999068f, -9.999573f, -9.999242f, -9.99952f, -9.999031f, -9.999762f, -9.999584f, -9.999476f, -9.999041f, -9.999508f, -9.999519f, -9.999463f, -9.999605f, -9.999481f, -9.99913f, -9.999719f, -9.99981f, -9.999058f, -9.99957f, -9.999909f, -9.99912f, -9.999596f, -9.999688f, -9.999179f, -9.999336f, -9.999998f, -9.999264f, -9.999145f, -9.99914f, -9.999104f, -9.999027f, -9.999755f, -9.999626f, -9.999572f, -9.999876f, -9.999124f, -9.9998865f, -9.999168f, -9.999185f, -9.9995575f, -9.999532f, -9.999246f, -9.999302f, -9.999073f, -9.999327f, -9.9998045f, -9.999645f, -9.999669f, -9.999047f, -9.999023f, -9.999354f, -9.999763f, -9.999772f, -9.999175f, -9.999568f, -9.999145f, -9.999254f, -9.999511f, -9.999705f, -9.999031f, -9.999324f, -9.999718f, -9.999497f, -9.99974f, -9.999597f, -9.999909f, -9.999239f, -9.999544f, -9.999691f, -9.999259f, -9.999239f, -9.999568f, -9.999504f, 0.03882216f, 0.8428897f, 0.74364215f, 0.23163715f, 0.49048677f, 0.22178552f, 0.6055793f, 0.4489804f, 0.9163623f, 0.9438124f, 0.1631071f, 0.6749212f, 0.7188561f, 0.32485962f, 0.8829685f, 0.20882395f, 0.60495543f, 0.47757575f, 0.6093003f, 0.84457403f, 0.7257506f, 0.17652789f, 0.025987253f, 0.9859064f, 0.6156289f, 0.73053515f, 0.76787066f, 0.5010675f, 0.40560544f, 0.07712759f, 0.9088255f, 0.07926025f, 0.24527292f, 0.27416497f, 0.74946845f, 0.24720564f, 0.07141664f, 0.43434754f, 0.4136174f, 0.869559f, 0.22436135f, 0.31195417f, 0.12554419f, 0.7383186f, 0.48795158f, 0.52957517f, 0.623028f, 0.036754537f, 0.56178623f, 0.32868809f, 0.9017316f, 0.09641818f, 0.9912348f, 0.92983764f, 0.4863829f, 0.2328445f, 0.72820157f, 0.5609035f, 0.5382467f, 0.21526214f, 0.2952519f, 0.391415f, 0.32775486f, 0.7910391f, 0.04752018f, 0.3907967f, 0.24044213f, 0.62969697f, 0.86658025f, 0.550671f, 0.6625566f, 0.7994618f, 0.12169334f, 0.21295948f, 0.4997118f, 0.98608136f, 0.67981267f, 0.5607458f, 0.20580857f, 0.59258527f, 0.74313295f, 0.504703f, 0.34825593f, 0.88810426f, 0.375232f, 0.9950801f, 0.6716571f, 0.43368435f, 0.13610889f, 0.7123607f, 0.5050985f, 0.31398848f, 0.6695705f, 0.12510324f, 0.18162547f, 0.61493284f, 0.816849f, 0.9648539f, 0.37662333f, 0.03039601f, 0.8444544f, 0.3708865f, 0.24754128f, 0.33466703f, 0.96997195f, 0.4863897f, 0.425792f, 0.5019443f, 0.3766153f, 0.37071276f, 0.30467907f, 0.5455875f, 0.47557223f, 0.99561185f, 0.82659286f, 0.50989014f, 0.8268076f, 0.32439554f, 0.90867627f, 0.523794f, 0.91507274f, 0.3708023f, 0.67873424f, 0.6258858f, 0.7507315f, 0.6253023f, 0.62942946f, 0.5893559f, 0.30942422f, 0.2114435f, 0.022920458f, 0.044418756f, 0.61610794f, 0.8113304f, 0.35662258f, 0.41705018f, 0.46921277f, 0.86777097f, 0.95223355f, 0.40362936f, 0.9437976f, 0.18228506f, 0.6360729f, 0.33576652f, 0.031274755f, 0.21817888f, 0.36112952f, 0.7787455f, 0.42273897f, 0.25281885f, 0.33198494f, 0.7785485f, 0.788286f, 0.16736427f, 0.0092501305f, 0.09297396f, 0.28935695f, 0.34107473f, 0.30980217f, 0.53143716f, 0.52857065f, 0.8409118f, 0.4052178f, 0.69706166f, 0.64710814f, 0.026039753f, 0.98393834f, 0.37317148f, 0.2896904f, 0.9887286f, 0.26908764f, 0.9406588f, 0.5261725f, 0.9049269f, 0.56662345f, 0.6709716f, 0.68239623f, 0.49234113f, 0.97048306f, 0.33545634f, 0.23616292f, 0.21654218f, 0.25211942f, 0.024790008f, 0.6374578f, 0.38915554f, 0.9337675f, 0.9430794f, 0.4695175f, 0.7804938f, 0.536538f, 0.9851012f, 0.19607964f, 0.3125924f, 0.55515915f, 0.85639995f, 0.76419586f, 0.19247372f, 0.8593474f, 0.65614396f, 0.8763346f, 0.5008372f, 0.75938493f, 0.30444136f, 0.8475765f, 0.2756218f, 0.7643892f, 0.10603409f, 0.4270085f, 0.40084615f, 0.094159424f, 0.28666124f, 0.907423f, 0.59824944f, 0.13585345f, 0.7766466f, 0.8080405f, 0.6886941f, 0.019375224f, 0.8924157f, 0.8251331f, 0.78726494f, 0.91793686f, 0.30526364f, 0.75136036f, 0.5101915f, 0.0959181f, 0.64297056f, 0.16485944f, 0.7552983f, 0.5024531f, 0.29433584f, 0.99849665f, 0.4194633f, 0.3247048f, 0.6200598f, 0.10172686f, 0.5053654f, 0.2359409f, 0.7552459f, 0.8971784f, 0.044323962f, 0.52423203f, 0.67628855f, 0.36866117f, 0.99563f, 0.2329034f, 0.27227026f, 0.76375973f, 0.79602706f, 0.5184415f, 0.10457488f, 0.0819885f, 0.90606177f, 0.052181873f, 0.6621527f, 0.92458886f, 0.24737877f, 0.04191045f, 0.34999782f, 0.08424192f, 0.29925734f, 0.24015819f, 0.5147704f, 0.42221153f, 0.99205357f, 0.54271156f, 0.79544294f, 0.5694224f, 0.37800944f, 0.5500707f, 0.09987821f, 0.40123457f, 0.7795467f, 0.8094248f, 0.5604407f, 0.34524485f, 0.56357986f, 0.6901132f, 0.2526902f, 0.46615395f, 0.24697252f, 0.5420497f, 0.18665877f, 0.6566352f, 0.2777055f, 0.9320998f, 0.89702964f, 0.022678716f, 0.1815973f, 0.09005783f, 0.51381236f, 0.6743502f, 0.6247244f, 0.8565416f, 0.87987f, 0.6732118f, 0.00460204f, 0.27535322f, 0.7455861f, 0.15749842f, 0.9247148f, 0.03532768f, 0.08851064f, 0.23502532f, 0.752143f, 0.21853413f, 0.6609476f, 0.28531924f, 0.18054475f, 0.029035527f, 0.67236483f, 0.2241403f, 0.28975555f, 0.99908245f, 0.43963638f, 0.59023327f, 0.30457687f, 0.16792373f, 0.7709499f, 0.6859642f, 0.69117963f, 0.86467695f, 0.5084144f, 0.7589203f, 0.4828981f, 0.07482473f, 0.48116097f, 0.53940266f, 0.5052822f, 0.22626108f, 0.7467059f, 0.41369334f, 0.031238595f, 0.028987564f, 0.66039693f, 0.22867519f, 0.8922084f, 0.23077016f, 0.49657655f, 0.12957393f, 0.5363605f, 0.4044849f, 0.44835f, 0.35317385f, 0.9867398f, 0.92447424f, 0.8969754f, 0.12785867f, 0.34567907f, 0.37078106f, 0.33044818f, 0.5057445f, 0.7683958f, 0.59161294f, 0.3239813f, 0.345188f, 0.5798496f, 0.64173394f, 0.8413601f, 0.47511417f, 0.835949f, 0.9396055f, 0.26686642f, 0.23109126f, 0.69826096f, 0.80957353f, 0.3445376f, 0.30203474f, 0.45118847f, 0.21602394f, 0.59850556f, 0.4789453f, 0.4077335f, 0.5152989f, 0.33034822f, 0.68474686f, 0.85391724f, 0.48057246f, 0.2998755f, 0.90360653f, 0.65591294f, 0.8092372f, 0.7287787f, 0.59123766f, 0.6105523f, 0.15701269f, 0.9201797f, 0.22071724f, 0.44657114f, 0.85324067f, 0.74536175f, 0.92492616f, 0.67641914f, 0.5987662f, 0.81729543f, 0.8069455f, 0.6891773f, 0.8835294f, 0.8892519f, 0.8500076f, 0.857101f, 0.6734726f, 0.9874815f, 0.46896955f, 0.9641137f, 0.47160545f, 0.8463774f, 0.30557284f, 0.9699319f, 0.06608189f, 0.055327572f, 0.93581414f, 0.9587841f, 0.058981307f, 0.92397076f, 0.010058546f, 0.34675553f, 0.6533823f, 0.5349482f, 0.46875533f, 0.5844002f, 0.5102338f, 0.26537207f, 0.19412437f, 0.07258324f, 0.38117927f, 0.1528994f, 0.056126937f, 0.7896892f, 0.3633707f, 0.5028834f, 0.15584666f, 0.43396717f, 0.7498128f, 0.17068368f, 0.8056127f, 0.83374524f, 0.7477155f, 0.8996221f, 0.53976667f, 0.9230572f, 0.19246647f, 0.6391656f, 0.4030687f, 0.7643678f, 0.019256072f, 0.59730285f, 0.309159f, 0.7264034f, 256.18292f, 247.5509f, 241.8322f, 221.72641f, 247.00475f, 289.95996f, 204.75641f, 299.0052f, 222.08545f, 249.15363f, 277.1748f, 222.7599f, 219.53043f, 259.93314f, 290.20483f, 264.3145f, 203.74707f, 269.35193f, 270.35507f, 233.42912f, 209.86781f, 292.96222f, 238.48882f, 256.7762f, 211.95813f, 255.83502f, 271.98605f, 276.92862f, 244.43182f, 219.40994f, 250.76295f, 294.04694f, 226.60033f, 258.7823f, 224.29234f, 289.13776f, 284.96054f, 215.06387f, 284.33295f, 255.14339f, 249.39714f, 298.0097f, 206.93636f, 207.78658f, 210.90904f, 237.74179f, 227.25084f, 248.60242f, 241.76729f, 289.64044f, 257.6767f, 223.0866f, 249.12407f, 201.15231f, 275.7378f, 262.39612f, 268.82336f, 262.55298f, 269.66827f, 237.66492f, 211.21674f, 246.47617f, 200.1591f, 228.94618f, 286.93787f, 224.82498f, 282.6982f, 216.67554f, 299.76526f, 211.74054f, 258.6674f, 282.2848f, 242.32083f, 244.45291f, 261.59262f, 257.17282f, 230.43474f, 219.33755f, 239.1705f, 229.16939f, 229.4628f, 227.99637f, 278.22507f, 207.49443f, 232.81923f, 250.38698f, 255.53925f, 201.98932f, 279.6214f, 245.52f, 216.7771f, 238.63602f, 204.19614f, 258.92218f, 230.05328f, 267.0341f, 256.95154f, 293.94968f, 251.7791f, 249.71518f, 268.04617f, 243.68118f, 239.60608f, 291.69824f, 255.33287f, 247.66194f, 210.42975f, 272.79053f, 251.49638f, 270.4292f, 266.5404f, 223.91647f, 227.0489f, 217.59396f, 202.26263f, 234.13164f, 282.81702f, 241.44751f, 237.6629f, 254.03835f, 276.81006f, 253.21158f, 290.75342f, 299.60394f, 252.36249f, 207.7176f, 293.0687f, 224.40785f, 254.29674f, 210.75064f, 251.1633f, 265.51978f, 292.73917f, 268.97003f, 213.86755f, 280.26193f, 236.59819f, 261.9136f, 271.9696f, 260.67432f, 225.67659f, 279.94318f, 244.74088f, 205.70877f, 236.24387f, 266.11798f, 234.5054f, 227.88277f, 212.92162f, 281.1429f, -9.9995f, -9.999907f, -9.999015f, -9.99986f, -9.999811f, -9.99916f, -9.9994335f, -9.999082f, -9.999476f, -9.999472f, -9.999309f, -9.999354f, -9.999964f, -9.999819f, -9.999472f, -9.999187f, -9.999328f, -9.999281f, -9.999373f, -9.999825f, -9.999259f, -9.999581f, -9.999256f, -9.999902f, -9.999506f, -9.999213f, -9.999032f, -9.999097f, -9.999959f, -9.999018f, -9.999999f, -9.999964f, -9.99983f, -9.999462f, -9.999094f, -9.999825f, -9.999322f, -9.999475f, -9.999018f, -9.999352f, -9.999122f, -9.999426f, -9.999498f, -9.999934f, -9.9994545f, -9.99973f, -9.999741f, -9.999373f, -9.99933f, -9.999706f, -9.999398f, -9.999283f, -9.999558f, -9.999604f, -9.999935f, -9.999592f, -9.999328f, -9.999943f, -9.999334f, -9.99971f, -9.999961f, -9.999668f, -9.9997835f, -9.999137f, -9.999606f, -9.999959f, -9.99975f, -9.999391f, -9.999501f, -9.999959f, -9.999507f, -9.999104f, -9.999123f, -9.999664f, -9.99954f, -9.999395f, -9.99991f, -9.999099f, -9.999796f, -9.999523f, -9.999298f, -9.999127f, -9.99933f, -9.999529f, -9.999645f, -9.999581f, -9.999803f, -9.999978f, -9.999745f, -9.999099f, -9.999732f, -9.999282f, -9.999186f, -9.999484f, -9.9994545f, -9.999736f, -9.999692f, -9.999638f, -9.999521f, -9.999184f, -9.999315f, -9.999997f, -9.999688f, -9.999604f, -9.999361f, -9.999519f, -9.999438f, -9.999516f, -9.999867f, -9.999932f, -9.99967f, -9.999632f, -9.999027f, -9.999614f, -9.999386f, -9.999235f, -9.99902f, -9.999881f, -9.999402f, -9.999828f, -9.999898f, -9.999556f, -9.9999485f, -9.99902f, -9.999726f, -9.99967f, -9.999689f, -9.999588f, -9.999742f, -9.999436f, -9.999829f, -9.999895f, -9.999559f, -9.999202f, -9.999972f, -9.999332f, -9.999621f, -9.999881f, -9.999916f, -9.999846f, -9.999947f, -9.999159f, -9.999294f, -9.999025f, -9.999374f, -9.999594f, -9.999471f, -9.999263f, -9.999252f, -9.999847f, 0.8405395f, 0.4899531f, 0.15557215f, 0.053656846f, 0.9073092f, 0.07903749f, 0.49019513f, 0.46704555f, 0.2108235f, 0.59149706f, 0.06908697f, 0.91793466f, 0.19079898f, 0.54947394f, 0.052311927f, 0.77982026f, 0.5299146f, 0.17064495f, 0.56645525f, 0.8840749f, 0.042285662f, 0.8682272f, 0.028326662f, 0.09698481f, 0.12325795f, 0.4347101f, 0.37012324f, 0.7913993f, 0.9993339f, 0.75977063f, 0.36460763f, 0.3775515f, 0.51856863f, 0.95555836f, 0.49067768f, 0.04478922f, 0.71699315f, 0.097812556f, 0.45841676f, 0.773683f, 0.75010455f, 0.42993996f, 0.9079247f, 0.017453227f, 0.44864193f, 0.672689f, 0.28056568f, 0.19584337f, 0.37550166f, 0.8117075f, 0.7120219f, 0.5780687f, 0.44134927f, 0.42259568f, 0.7511653f, 0.5891905f, 0.67056227f, 0.11231151f, 0.6758219f, 0.22908887f, 0.37498733f, 0.41971782f, 0.055803128f, 0.59144944f, 0.9299475f, 0.12942357f, 0.95274854f, 0.32053652f, 0.20608023f, 0.16834818f, 0.57836413f, 0.055714697f, 0.06392813f, 0.29768264f, 0.09972937f, 0.8983277f, 0.97463375f, 0.1341327f, 0.65210474f, 0.35204768f, 0.014110221f, 0.80327654f, 0.6689872f, 0.9037585f, 0.90981257f, 0.86295295f, 0.3795516f, 0.0062070885f, 0.5173644f, 0.20474744f, 0.86028427f, 0.15545785f, 0.3484738f, 0.48408556f, 0.28058404f, 0.75635433f, 0.5704764f, 0.80539626f, 0.8308685f, 0.7464902f, 0.12689869f, 0.89151156f, 0.37369293f, 0.36895418f, 0.5450234f, 0.1559311f, 0.2432725f, 0.38309494f, 0.27770162f, 0.56394845f, 0.72261786f, 0.5332152f, 0.49045795f, 0.88231075f, 0.6032768f, 0.6665413f, 0.857885f, 0.31463873f, 0.9153665f, 0.37640592f, 0.58912075f, 0.24793272f, 0.7373741f, 0.8440094f, 0.015947558f, 0.58805275f, 0.3667698f, 0.46238968f, 0.8334069f, 0.81946284f, 0.19397281f, 0.92121077f, 0.964989f, 0.24575949f, 0.0900369f, 0.6689977f, 0.23726216f, 0.601819f, 0.16691278f, 0.47163498f, 0.03375374f, 0.36948392f, 0.08575206f, 0.9858967f, 0.7306862f, 0.21772163f, 0.39309397f, 0.7458295f, 0.7629526f, 0.3144869f, 0.94122046f, 0.20584162f, 0.83637947f, 0.7726502f, 0.9049252f, 0.36524808f, 0.7137413f, 0.8284559f, 0.22519512f, 0.30139557f, 0.8169721f, 0.5312386f, 0.8956069f, 0.66213816f, 0.58457166f, 0.45457113f, 0.5169665f, 0.6269637f, 0.26091218f, 0.7560391f, 0.7980105f, 0.3960119f, 0.08781406f, 0.10958682f, 0.12124728f, 0.4373948f, 0.031676244f, 0.55287856f, 0.7805502f, 0.56280786f, 0.25152865f, 0.566051f, 0.7870067f, 0.759523f, 0.45281285f, 0.62631804f, 0.989187f, 0.26606834f, 0.39388546f, 0.87392044f, 0.583776f, 0.654467f, 0.49633527f, 0.39479604f, 0.63170516f, 0.62530655f, 0.9021866f, 0.13965032f, 0.35174674f, 0.79825306f, 0.7204604f, 0.8848764f, 0.43971986f, 0.7367297f, 0.71475625f, 0.07822404f, 0.42548487f, 0.11135407f, 0.80643165f, 0.83326644f, 0.8646103f, 0.89960915f, 0.46280593f, 0.8834037f, 0.2807901f, 0.68196964f, 0.3704893f, 0.4120405f, 0.82667f, 0.02957211f, 0.16348517f, 0.528726f, 0.36919758f, 0.22145572f, 0.43879473f, 0.09656078f, 0.5824419f, 0.0181659f, 0.25570688f, 0.7642685f, 0.19078839f, 0.70748967f, 0.5835414f, 0.92161185f, 0.8213292f, 0.046582457f, 0.85949063f, 0.15103385f, 0.74723977f, 0.39284366f, 0.5726992f, 0.07368804f, 0.3426399f, 0.17463133f, 0.24858418f, 0.31684884f, 0.49405006f, 0.37952894f, 0.33315596f, 0.8640441f, 0.57182634f, 0.25183997f, 0.7026268f, 0.37704948f, 0.17044407f, 0.27955136f, 0.96993434f, 0.09108966f, 0.6897659f, 0.19774762f, 0.6693781f, 0.12952057f, 0.89581305f, 0.21900262f, 0.1147024f, 0.29112664f, 0.06916158f, 0.22942513f, 0.42038745f, 0.7651415f, 0.45440084f, 0.17078096f, 0.07726187f, 0.4274913f, 0.86462736f, 0.06414275f, 0.9592153f, 0.16050456f, 0.88035154f, 0.9545343f, 0.8513476f, 0.2491725f, 0.7261043f, 0.5407395f, 0.22621076f, 0.31755584f, 0.75632083f, 0.7962324f, 0.50990444f, 0.61564916f, 0.76425743f, 0.70222944f, 0.73869663f, 0.29614443f, 0.021682443f, 0.5887306f, 0.31215057f, 0.10243766f, 0.9339864f, 0.23341663f, 0.7255635f, 0.4185125f, 0.5641563f, 0.0210989f, 0.31937757f, 0.77237654f, 0.055116564f, 0.31758264f, 0.35916016f, 0.5235203f, 0.15846917f, 0.5410007f, 0.3291817f, 0.14069794f, 0.90887386f, 0.259237f, 0.93863297f, 0.75447625f, 0.6713672f, 0.5048135f, 0.7174148f, 0.52741486f, 0.92290014f, 0.0805213f, 0.70555705f, 0.8765804f, 0.21684085f, 0.059146658f, 0.52307314f, 0.24510364f, 0.73993003f, 0.081979565f, 0.76904917f, 0.57904243f, 0.4695278f, 0.016590666f, 0.7074726f, 0.03675281f, 0.05884536f, 0.8561499f, 0.7090553f, 0.86932564f, 0.31001756f, 0.7310781f, 0.7902563f, 0.4690628f, 0.5504265f, 0.99635744f, 0.8836126f, 0.49213162f, 0.4428661f, 0.88994193f, 0.35176337f, 0.4958119f, 0.5913544f, 0.4187957f, 0.27758822f, 0.28339785f, 0.7841562f, 0.30195132f, 0.752634f, 0.3137563f, 0.4315457f, 0.44653264f, 0.5451809f, 0.44049335f, 0.8987003f, 0.5640792f, 0.5874427f, 0.47600824f, 0.5928f, 0.80064255f, 0.20061128f, 0.37571868f, 0.8139443f, 0.62335235f, 0.8047332f, 0.31274527f, 0.30714568f, 0.035397593f, 0.69739f, 0.2944578f, 0.34834376f, 0.5873635f, 0.9606469f, 0.5618423f, 0.6756651f, 0.03466902f, 0.27137738f, 0.59027666f, 0.8357776f, 0.425116f, 0.50365347f, 0.4515947f, 0.4932688f, 0.005631942f, 0.57952595f, 0.47525176f, 0.6249525f, 0.086651884f, 0.89189065f, 0.6617942f, 0.9442606f, 0.27843753f, 0.44292933f, 0.38660362f, 0.07765346f, 0.50435954f, 0.83211386f, 0.9370695f, 0.39374778f, 0.08252517f, 0.20432696f, 0.9130672f, 0.6829529f, 0.4023203f, 0.18018572f, 0.7534347f, 0.42706057f, 0.42672646f, 0.47151735f, 0.22955406f, 0.9152989f, 0.08499177f, 0.21106064f, 0.81278425f, 0.4464995f, 0.9721553f, 0.5701927f, 0.5504968f, 0.33792228f, 0.97337884f, 0.1806469f, 0.09640216f, 0.163271f, 0.42888898f, 0.778335f, 0.8884757f, 0.79867357f, 0.7878421f, 0.07889473f, 0.35902497f, 0.56884366f, 0.4541578f, 0.85038835f, 0.5382435f, 0.09464303f, 0.9107641f, 0.94099534f, 0.5400446f, 266.79602f, 274.32846f, 213.67004f, 233.85674f, 243.74121f, 250.29242f, 241.2762f, 246.10477f, 210.67426f, 209.43724f, 229.85814f, 280.7868f, 272.1595f, 250.896f, 203.6569f, 224.5947f, 228.5461f, 250.31659f, 259.0063f, 207.73958f, 214.5609f, 227.4157f, 288.49915f, 258.5862f, 237.1694f, 260.80396f, 253.53038f, 216.46973f, 200.73683f, 276.59747f, 218.64984f, 277.839f, 211.7889f, 278.14984f, 276.74042f, 224.4895f, 237.72171f, 253.24715f, 202.98746f, 237.59871f, 204.87325f, 239.43521f, 295.81796f, 299.5604f, 222.03635f, 228.79982f, 266.0576f, 239.92245f, 268.24426f, 238.24408f, 298.47308f, 288.47458f, 215.21046f, 248.30959f, 290.8601f, 287.38885f, 209.855f, 220.54123f, 251.46211f, 269.38593f, 215.89407f, 249.74835f, 233.35129f, 259.1078f, 247.44966f, 203.68665f, 295.11304f, 298.9008f, 216.80823f, 265.98523f, 250.68268f, 259.11737f, 224.44098f, 201.49985f, 265.72772f, 291.2741f, 291.02527f, 205.01653f, 225.3552f, 230.4449f, 205.90791f, 236.37225f, 234.94302f, 227.96848f, 293.9239f, 200.43617f, 261.1322f, 246.37569f, 206.33258f, 230.6332f, 275.16974f, 226.53664f, 253.74765f, 201.92174f, 277.2812f, 279.80594f, 269.5651f, 215.83727f, 290.79214f, 209.25894f, 240.69214f, 259.45502f, 221.35303f, 245.88794f, 233.58676f, 278.87738f, 268.62115f, 238.47983f, 288.8792f, 284.89505f, 235.00497f, 242.7936f, 236.64014f, 252.04784f, 205.45514f, 290.40726f, 232.52823f, 259.1132f, 290.73474f, 227.57782f, 216.67067f, 294.74762f, 217.73929f, 209.24208f, 256.90912f, 240.18433f, 257.794f, 282.8988f, 208.77882f, 297.82245f, 299.72125f, 298.86118f, 282.77133f, 299.69577f, 298.43073f, 299.66992f, 206.1796f, 239.80862f, 245.31291f, 207.94046f, 256.93558f, 210.00853f, 297.19482f, 258.61487f, 298.00143f, 247.14326f, 220.11229f, 299.13562f, 289.7299f, 244.51624f, -9.999632f, -9.999593f, -9.999801f, -9.999819f, -9.999018f, -9.999244f, -9.999898f, -9.999155f, -9.999041f, -9.999333f, -9.999995f, -9.999601f, -9.999369f, -9.999678f, -9.99932f, -9.999411f, -9.999675f, -9.999204f, -9.999888f, -9.999743f, -9.999049f, -9.999095f, -9.9994955f, -9.999148f, -9.999902f, -9.999157f, -9.999642f, -9.999242f, -9.999449f, -9.99954f, -9.999594f, -9.999917f, -9.999246f, -9.999855f, -9.999591f, -9.999358f, -9.999842f, -9.999382f, -9.999745f, -9.999809f, -9.999109f, -9.999151f, -9.999462f, -9.999784f, -9.999753f, -9.999547f, -9.999858f, -9.999641f, -9.999331f, -9.999973f, -9.999725f, -9.999956f, -9.999523f, -9.999478f, -9.999359f, -9.999043f, -9.999455f, -9.999254f, -9.999494f, -9.999362f, -9.999646f, -9.999454f, -9.999153f, -9.99971f, -9.99948f, -9.999924f, -9.999973f, -9.9990425f, -9.999157f, -9.999034f, -9.999135f, -9.999451f, -9.99927f, -9.999871f, -9.999655f, -9.999354f, -9.999864f, -9.999408f, -9.999447f, -9.999032f, -9.999453f, -9.999718f, -9.999415f, -9.999358f, -9.999691f, -9.99945f, -9.999504f, -9.999244f, -9.999987f, -9.999557f, -9.999052f, -9.999141f, -9.999237f, -9.999049f, -9.99919f, -9.999888f, -9.999757f, -9.999621f, -9.999702f, -9.999411f, -9.999203f, -9.999174f, -9.999015f, -9.999339f, -9.999034f, -9.999728f, -9.99976f, -9.999317f, -9.999367f, -9.999866f, -9.999091f, -9.999755f, -9.999178f, -9.999553f, -9.999263f, -9.999655f, -9.999423f, -9.999304f, -9.999814f, -9.999966f, -9.999977f, -9.9992075f, -9.999666f, -9.999204f, -9.999895f, -9.999059f, -9.99907f, -9.9995575f, -9.999523f, -9.999056f, -9.999571f, -9.999786f, -9.999026f, -9.999145f, -9.999575f, -9.999738f, -9.99979f, -9.999363f, -9.999586f, -9.999727f, -9.999086f, -9.999402f, -9.999158f, -9.999252f, -9.999179f, -9.999597f, -9.999156f, -9.99936f, -9.999807f, -9.999261f, 0.5652288f, 0.9339315f, 0.55770487f, 0.7478212f, 0.33771703f, 0.28125492f, 0.51592994f, 0.5532214f, 0.58044416f, 0.66528046f, 0.669034f, 0.16671883f, 0.67413294f, 0.036051773f, 0.108843535f, 0.7993396f, 0.1639013f, 0.6568752f, 0.122072175f, 0.70342636f, 0.5444655f, 0.5812534f, 0.4522436f, 0.2419f, 0.07067616f, 0.8879451f, 0.60514754f, 0.14282055f, 0.70217454f, 0.10503953f, 0.39604086f, 0.60164565f, 0.5446685f, 0.07094606f, 0.5559759f, 0.014643576f, 0.9885768f, 0.45798954f, 0.80507016f, 0.46793476f, 0.91752577f, 0.04094297f, 0.60369307f, 0.8747373f, 0.5086575f, 0.7004933f, 0.2251465f, 0.35307238f, 0.27597564f, 0.94157344f, 0.65179616f, 0.20595148f, 0.27256346f, 0.20036213f, 0.67921185f, 0.15910614f, 0.52645075f, 0.6180527f, 0.09315563f, 0.4282912f, 0.3796773f, 0.55366653f, 0.8087156f, 0.989089f, 0.81570625f, 0.36953965f, 0.29338685f, 0.8806224f, 0.40907812f, 0.99581677f, 0.031810474f, 0.9831273f, 0.21194534f, 0.6745432f, 0.38136473f, 0.2702163f, 0.6385419f, 0.29438227f, 0.12847719f, 0.27120438f, 0.30660692f, 0.5424479f, 0.92706877f, 0.9079774f, 0.22223541f, 0.3657775f, 0.25447527f, 0.81911993f, 0.30269873f, 0.74017876f, 0.92759985f, 0.70151937f, 0.7640615f, 0.8949204f, 0.79928416f, 0.77783567f, 0.6940916f, 0.2910855f, 0.97654736f, 0.2973309f, 0.5588422f, 0.6462096f, 0.30760437f, 0.18172295f, 0.7695246f, 0.34731266f, 0.19734544f, 0.029608455f, 0.37696892f, 0.111436665f, 0.50183326f, 0.28445065f, 0.68564844f, 0.44779962f, 0.9736052f, 0.51790065f, 0.983022f, 0.52825344f, 0.41285545f, 0.9967343f, 0.6162969f, 0.37753683f, 0.17138597f, 0.07175013f, 0.81368434f, 0.9612253f, 0.9045651f, 0.84745973f, 0.36729226f, 0.98037714f, 0.20115525f, 0.12099608f, 0.96984464f, 0.37242016f, 0.29363927f, 0.39158085f, 0.27558497f, 0.66305256f, 0.10113714f, 0.76193494f, 0.45118755f, 0.4488773f, 0.93012637f, 0.31139725f, 0.0031577414f, 0.22718209f, 0.29718128f, 0.71752393f, 0.14526285f, 0.18364605f, 0.37547293f, 0.9685261f, 0.9378056f, 0.27025697f, 0.8536382f, 0.40919214f, 0.6247997f, 0.020774715f, 0.2789666f, 0.6214883f, 0.28909984f, 0.4459083f, 0.22759606f, 0.16503142f, 0.12913509f, 0.76620036f, 0.31722352f, 0.31122422f, 0.14058389f, 0.3711774f, 0.2540991f, 0.92829734f, 0.31982893f, 0.58990836f, 0.7611616f, 0.94479626f, 0.77106464f, 0.98198724f, 0.045493614f, 0.5808194f, 0.044766188f, 0.028754123f, 0.6398209f, 0.5149536f, 0.6159741f, 0.38356403f, 0.3443942f, 0.8204024f, 0.16429621f, 0.45349202f, 0.9345274f, 0.6689286f, 0.46520096f, 0.5479114f, 0.50660115f, 0.030693837f, 0.14807424f, 0.0025167174f, 0.04072329f, 0.06662837f, 0.19923986f, 0.31228405f, 0.26450446f, 0.5282875f, 0.32404247f, 0.3938328f, 0.028723368f, 0.53065664f, 0.84379214f, 0.84157664f, 0.37586623f, 0.15792112f, 0.20647834f, 0.024251468f, 0.3573017f, 0.37901312f, 0.6181092f, 0.76309824f, 0.7608666f, 0.3481646f, 0.34048688f, 0.47856995f, 0.31012326f, 0.23520178f, 0.45539266f, 0.92912894f, 0.4204687f, 0.92543155f, 0.5307048f, 0.27608588f, 0.7496653f, 0.6049889f, 0.36525294f, 0.14689086f, 0.51323116f, 0.12193437f, 0.59619224f, 0.60478336f, 0.9294276f, 0.249309f, 0.74476606f, 0.92789376f, 0.043751504f, 0.5309229f, 0.3062958f, 0.31674966f, 0.14777556f, 0.52924913f, 0.9668007f, 0.20873389f, 0.3279674f, 0.7965414f, 0.37618962f, 0.89503884f, 0.46796778f, 0.0799155f, 0.13676843f, 0.99596673f, 0.5959752f, 0.82745814f, 0.19763403f, 0.45169583f, 0.034008075f, 0.51954156f, 0.5263711f, 0.32014525f, 0.053273566f, 0.81357837f, 0.97085255f, 0.07153194f, 0.9582462f, 0.64213526f, 0.32651472f, 0.60837305f, 0.9404863f, 0.06993771f, 0.7587776f, 0.7886673f, 0.41194588f, 0.78207874f, 0.7781359f, 0.3276002f, 0.33506534f, 0.28078383f, 0.12973906f, 0.399713f, 0.62760603f, 0.75171447f, 0.80802286f, 0.5050624f, 0.33723688f, 0.23653711f, 0.22387893f, 0.3570362f, 0.05210913f, 0.8889524f, 0.49352857f, 0.4521699f, 0.9740411f, 0.7144635f, 0.4756838f, 0.331589f, 0.068503655f, 0.97924995f, 0.41867498f, 0.31639704f, 0.7069934f, 0.81501675f, 0.5386601f, 0.4093507f, 0.707298f, 0.9774356f, 0.72752196f, 0.1570271f, 0.9423814f, 0.9732382f, 0.71725017f, 0.3946321f, 0.62860346f, 0.06245658f, 0.90315664f, 0.5143768f, 0.8708286f, 0.84123635f, 0.92691624f, 0.639396f, 0.2552601f, 0.37173754f, 0.7914776f, 0.91429204f, 0.4736561f, 0.15064463f, 0.7540974f, 0.2862515f, 0.48185065f, 0.13227704f, 0.32188603f, 0.63464296f, 0.8106472f, 0.94166034f, 0.17569262f, 0.19304337f, 0.29407963f, 0.587708f, 0.97985137f, 0.93614686f, 0.8405717f, 0.02620014f, 0.35624048f, 0.59463245f, 0.011628275f, 0.66693187f, 0.74045765f, 0.8160365f, 0.84104806f, 0.88261247f, 0.0711487f, 0.8989867f, 0.97475845f, 0.4168518f, 0.13669337f, 0.28926903f, 0.49182004f, 0.41090083f, 0.276433f, 0.09197279f, 0.68734396f, 0.3883402f, 0.90047145f, 0.11048286f, 0.15737055f, 0.21775864f, 0.9536175f, 0.076466806f, 0.24726667f, 0.103641525f, 0.0413075f, 0.27288043f, 0.3405656f, 0.14998767f, 0.51837134f, 0.16329993f, 0.3755023f, 0.9497281f, 0.8958037f, 0.98416775f, 0.34084278f, 0.18396701f, 0.8870497f, 0.11773594f, 0.7778607f, 0.5278507f, 0.9345038f, 0.12104616f, 0.3192234f, 0.026860172f, 0.71437854f, 0.8270822f, 0.34825006f, 0.39791596f, 0.62681943f, 0.27854878f, 0.519083f, 0.9585388f, 0.9732782f, 0.24999642f, 0.18574189f, 0.92319125f, 0.2299785f, 0.78481007f, 0.4593966f, 0.18952563f, 0.4418934f, 0.75275475f, 0.47553676f, 0.47977385f, 0.516905f, 0.6218342f, 0.986334f, 0.6328223f, 0.87600803f, 0.23837951f, 0.29930744f, 0.5477805f, 0.17647119f, 0.3403492f, 0.79772884f, 0.12769036f, 0.8723695f, 0.1560829f, 0.75527936f, 0.41855234f, 0.66972154f, 0.3795148f, 0.75438255f, 0.45185962f, 0.64733654f, 0.83693033f, 0.7853063f, 0.52869916f, 0.44457012f, 0.031068115f, 0.995698f, 0.86542577f, 0.29396066f, 0.3056323f, 0.7761462f, 0.5815433f, 0.4590591f, 0.6379277f, 203.08049f, 242.811f, 200.0787f, 248.54701f, 240.53275f, 206.88977f, 264.96545f, 215.722f, 207.14218f, 248.2029f, 260.38293f, 246.59158f, 255.92654f, 290.20236f, 282.13013f, 255.587f, 289.51746f, 250.55061f, 256.14774f, 212.82437f, 283.77695f, 234.53087f, 295.53558f, 263.51688f, 262.4394f, 295.93118f, 249.12567f, 230.53714f, 244.58417f, 212.62454f, 222.62276f, 202.04688f, 220.03893f, 219.85342f, 298.00995f, 225.98215f, 237.55687f, 233.73161f, 277.78552f, 292.03333f, 241.16255f, 239.44547f, 269.768f, 208.34856f, 223.83221f, 247.22945f, 220.80157f, 225.7253f, 267.53107f, 219.36331f, 263.37506f, 292.40854f, 238.76868f, 248.44582f, 284.12405f, 266.40955f, 297.5755f, 221.04996f, 205.62082f, 256.34137f, 216.44402f, 236.91107f, 213.73282f, 215.86444f, 256.87595f, 251.31393f, 216.1751f, 265.14798f, 213.08633f, 254.30765f, 244.74179f, 278.06122f, 262.01956f, 248.49234f, 205.56573f, 285.15247f, 291.18823f, 246.23334f, 286.69305f, 297.73892f, 222.13132f, 274.70645f, 272.9896f, 218.96129f, 263.71072f, 289.10516f, 210.93655f, 235.38228f, 240.58383f, 289.90942f, 238.94185f, 276.05884f, 239.10864f, 254.86401f, 282.10757f, 204.39113f, 238.20418f, 291.72028f, 279.3937f, 255.42195f, 223.81288f, 201.32336f, 262.53845f, 218.35716f, 291.38098f, 248.38783f, 276.37997f, 251.07683f, 295.05258f, 210.5348f, 252.41638f, 265.33124f, 294.82996f, 279.9688f, 295.2437f, 275.68787f, 202.7976f, 207.2586f, 262.63266f, 295.0467f, 288.30432f, 231.05023f, 298.57654f, 286.71002f, 222.34149f, 209.956f, 297.5865f, 204.87299f, 243.4733f, 242.39302f, 209.53899f, 221.00655f, 211.91463f, 266.0036f, 223.22115f, 266.37555f, 278.43994f, 214.11813f, 254.79947f, 234.70715f, 294.82663f, 267.89825f, 282.26373f, 285.57803f, 216.04143f, 222.16176f, 264.46344f, 216.57985f, 208.0961f, 251.9738f, -9.999269f, -9.999741f, -9.999561f, -9.999911f, -9.999339f, -9.999749f, -9.999292f, -9.999522f, -9.999454f, -9.9992895f, -9.999531f, -9.99933f, -9.999341f, -9.99938f, -9.999905f, -9.999054f, -9.999979f, -9.999243f, -9.999734f, -9.999235f, -9.999104f, -9.999684f, -9.999259f, -9.999619f, -9.999497f, -9.999474f, -9.999353f, -9.999263f, -9.999088f, -9.999558f, -9.999322f, -9.999186f, -9.9993925f, -9.9999075f, -9.999958f, -9.999795f, -9.999834f, -9.999768f, -9.999121f, -9.999825f, -9.999527f, -9.999656f, -9.999941f, -9.999142f, -9.999984f, -9.999141f, -9.999887f, -9.9990835f, -9.999148f, -9.9991665f, -9.999867f, -9.999421f, -9.999081f, -9.999978f, -9.999075f, -9.999531f, -9.999142f, -9.999553f, -9.999812f, -9.999398f, -9.999295f, -9.9992285f, -9.999865f, -9.999482f, -9.999524f, -9.999773f, -9.999741f, -9.999358f, -9.999916f, -9.999248f, -9.999274f, -9.999893f, -9.999962f, -9.999569f, -9.9997225f, -9.999103f, -9.999036f, -9.999721f, -9.999645f, -9.999536f, -9.999113f, -9.9998455f, -9.999898f, -9.999262f, -9.999967f, -9.999528f, -9.9996195f, -9.999813f, -9.99977f, -9.999597f, -9.999661f, -9.999434f, -9.999925f, -9.999199f, -9.999759f, -9.999627f, -9.999813f, -9.999361f, -9.999325f, -9.999499f, -9.999843f, -9.999769f, -9.999987f, -9.999241f, -9.999264f, -9.999075f, -9.9998665f, -9.99927f, -9.999766f, -9.999045f, -9.999036f, -9.999232f, -9.999256f, -9.999415f, -9.999601f, -9.999707f, -9.999876f, -9.999688f, -9.999064f, -9.999532f, -9.99921f, -9.99905f, -9.999712f, -9.999656f, -9.999218f, -9.999016f, -9.999569f, -9.999398f, -9.999709f, -9.999183f, -9.999058f, -9.999427f, -9.999155f, -9.999367f, -9.999406f, -9.99968f, -9.999578f, -9.999454f, -9.999143f, -9.999611f, -9.999365f, -9.999709f, -9.9992285f, -9.9998255f, -9.999111f, -9.999831f, -9.999511f, -9.999469f, -9.99995f, -9.999711f, 0.5344577f, 0.28066808f, 0.56196564f, 0.5902792f, 0.8473387f, 0.24633567f, 0.92718124f, 0.17364842f, 0.31536132f, 0.22439669f, 0.46772173f, 0.23150134f, 0.13030241f, 0.7544915f, 0.32698f, 0.59160626f, 0.5460109f, 0.84683007f, 0.23899049f, 0.8182671f, 0.7197824f, 0.8125036f, 0.8256115f, 0.40416914f, 0.66582596f, 0.0867179f, 0.0084044915f, 0.49205506f, 0.721172f, 0.40177187f, 0.29393357f, 0.015860511f, 0.93151456f, 0.4811004f, 0.54983306f, 0.9995074f, 0.27758396f, 0.22854643f, 0.5583765f, 0.6666239f, 0.85158247f, 0.21441942f, 0.6990569f, 0.017201606f, 0.530989f, 0.21839866f, 0.08578203f, 0.10198945f, 0.039713096f, 0.7290501f, 0.6342606f, 0.51234406f, 0.12498403f, 0.25547478f, 0.8394662f, 0.8280061f, 0.81155413f, 0.012060473f, 0.057682104f, 0.7739566f, 0.08708117f, 0.5193988f, 0.8415829f, 0.7520876f, 0.007182941f, 0.7731886f, 0.33688733f, 0.19361727f, 0.84651196f, 0.22044875f, 0.54851544f, 0.6421493f, 0.58298194f, 0.6989305f, 0.4031829f, 0.41380137f, 0.20955233f, 0.47619122f, 0.65416205f, 0.44766036f, 0.7429968f, 0.47871348f, 0.36874366f, 0.76017255f, 0.63620025f, 0.6808348f, 0.8399061f, 0.72613007f, 0.97575134f, 0.4643534f, 0.7247778f, 0.04549828f, 0.5940095f, 0.5128606f, 0.5878437f, 0.46860144f, 0.6618377f, 0.83293724f, 0.26350665f, 0.24366878f, 0.7788333f, 0.74646133f, 0.5429722f, 0.26375026f, 0.3656472f, 0.12205635f, 0.7138406f, 0.7608406f, 0.60281974f, 0.33415812f, 0.16791728f, 0.68858635f, 0.4469567f, 0.04436514f, 0.5672564f, 0.89869404f, 0.6294232f, 0.9793584f, 0.092907295f, 0.51271373f, 0.3846658f, 0.79488826f, 0.30746242f, 0.9191275f, 0.9108379f, 0.78182805f, 0.97138745f, 0.9847524f, 0.8531674f, 0.022702204f, 0.621023f, 0.7043253f, 0.22311302f, 0.6966194f, 0.36192545f, 0.8646154f, 0.94498384f, 0.8819606f, 0.39050183f, 0.66352f, 0.9537454f, 0.9776376f, 0.07475392f, 0.14165574f, 0.9068708f, 0.07851684f, 0.098995164f, 0.4659044f, 0.94835365f, 0.8669782f, 0.47114196f, 0.24303971f, 0.36649755f, 0.38048944f, 0.3541504f, 0.3041829f, 0.04842617f, 0.5725111f, 0.68421566f, 0.18098183f, 0.96466625f, 0.32582006f, 0.47631285f, 0.17308696f, 0.5422008f, 0.43860963f, 0.94000804f, 0.90531296f, 0.24555893f, 0.15075591f, 0.8892247f, 0.80251575f, 0.43217945f, 0.5427292f, 0.58730876f, 0.9010511f, 0.75740033f, 0.16942962f, 0.77507013f, 0.7471421f, 0.18903506f, 0.96626693f, 0.43212372f, 0.9690648f, 0.31306309f, 0.62832534f, 0.7866172f, 0.79370797f, 0.32908842f, 0.5066318f, 0.34556115f, 0.1002444f, 0.90521127f, 0.3832993f, 0.3292787f, 0.9103993f, 0.17307699f, 0.36895168f, 0.7688117f, 0.7769159f, 0.7559714f, 0.7624208f, 0.4072027f, 0.6700012f, 0.10266004f, 0.46105045f, 0.8847699f, 0.3703581f, 0.79471564f, 0.18433845f, 0.26636884f, 0.5759068f, 0.025358567f, 0.6020128f, 0.85619676f, 0.77020776f, 0.8782154f, 0.605358f, 0.82230324f, 0.3943509f, 0.10723012f, 0.23251477f, 0.41980323f, 0.44982743f, 0.3976f, 0.24261324f, 0.09185766f, 0.9083403f, 0.8951799f, 0.93775445f, 0.4116088f, 0.8328249f, 0.060170095f, 0.23731631f, 0.043149915f, 0.8760627f, 0.9832404f, 0.8160704f, 0.35087004f, 0.99301636f, 0.58498734f, 0.31982517f, 0.28746068f, 0.10150419f, 0.64765805f, 0.93925524f, 0.6288832f, 0.5287214f, 0.6787367f, 0.7280878f, 0.8089835f, 0.45152652f, 0.28626585f, 0.37735057f, 0.84606636f, 0.17912877f, 0.1262947f, 0.93639624f, 0.74632484f, 0.10586514f, 0.2034781f, 0.3999192f, 0.6237884f, 0.58933526f, 0.11924875f, 0.16451561f, 0.5822025f, 0.3976624f, 0.9056206f, 0.66830647f, 0.801052f, 0.6321766f, 0.47481045f, 0.6505067f, 0.5119758f, 0.8057609f, 0.059799645f, 0.014172987f, 0.637021f, 0.878043f, 0.19765095f, 0.7158634f, 0.6288858f, 0.41249686f, 0.2579455f, 0.32608235f, 0.153792f, 0.030521471f, 0.5082303f, 0.33682522f, 0.5155604f, 0.8285316f, 0.7492474f, 0.56472075f, 0.7964325f, 0.8807934f, 0.21563967f, 0.67301345f, 0.32791767f, 0.47523862f}; -}; - - -class EuclideanDistanceTest : public testing::Test { -public: - float x[16] = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f}; - float y[16] = {2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f}; - float result[9] = {0.f}; - Nd4jLong shapeBuffer[12] = {4,2,2,2,2,8,4,2,1,0,1,99}; - int dimensionLength = 3; - int dimension[3] = {1,2,3}; - float extraVals[2] = {0.f, 0.f}; - int opNum = 1; - - std::vector dim = {1, 2, 3}; -}; - -#ifndef __CUDABLAS__ -TEST_F(EuclideanDistanceTest,Test1) { - //int *tadShapeBuffer = shape::computeResultShape(shapeBuffer,dimension,dimensionLength); - sd::ArrayOptions::setDataType(shapeBuffer, sd::DataType::FLOAT32); - auto tadShapeBuffer = sd::ShapeUtils::evalReduceShapeInfo('c', dim, shapeBuffer, false, true, nullptr); - //shape::printShapeInfoLinear("tadShape", tadShapeBuffer); - functions::reduce3::Reduce3::exec(opNum, - x, - shapeBuffer, - extraVals, - y, - shapeBuffer, - result, - tadShapeBuffer, - dimension, - dimensionLength, 0, 2); - - ASSERT_EQ(result[1],result[0]); -} - - -TEST_F(StdTest,MultiDimTest) { - auto xShapeInfo = shape::shapeBuffer(4, sd::DataType::FLOAT32, examplesShape); - //int *resultShapeInfo = shape::computeResultShape(xShapeInfo,dimensionsForStd,dimensionLength); - auto resultShapeInfo = sd::ShapeUtils::evalReduceShapeInfo('c', dimsForStd, xShapeInfo, false, true, nullptr); - int resultLengthAssertion = 5; - ASSERT_EQ(resultLengthAssertion,shape::length(resultShapeInfo)); - shape::TAD *tad = new shape::TAD; - tad->init(xShapeInfo,dimensionsForStd,dimensionLength); - float none[1] = {0.f}; - tad->createTadOnlyShapeInfo(); - tad->createOffsets(); - int tadElementWiseStride = shape::elementWiseStride(tad->tadOnlyShapeInfo); - ASSERT_EQ(0,tadElementWiseStride); - float *result = new float[shape::length(resultShapeInfo)]; - functions::reduce::ReduceFloatFunction::exec( - opNum, - x, - xShapeInfo, - none, - result, - resultShapeInfo, - dimensionsForStd, - dimensionLength, - tad->tadOnlyShapeInfo, - tad->tadOffsets, 0, shape::length(resultShapeInfo)); - - // for(int i = 0; i < shape::length(resultShapeInfo); i++) - // printf("%f\n",result[i]); - - delete[] result; - delete tad; - delete[] xShapeInfo; -} - - - - - -TEST_F(ReduceTest,MatrixTest) { - int opNum = 4; - auto xShapeInfo = sd::ShapeBuilders::createShapeInfo(sd::DataType::FLOAT32, 'c', 2, shape); - //int *resultShapeInfo = shape::computeResultShape(xShapeInfo,dimension,dimensionLength); - auto resultShapeInfo = sd::ShapeUtils::evalReduceShapeInfo('c', dim, xShapeInfo, false, true, nullptr); - int resultLengthAssertion = 3; - ASSERT_EQ(resultLengthAssertion,shape::length(resultShapeInfo)); - shape::TAD *tad = new shape::TAD; - tad->init(xShapeInfo,dimension,dimensionLength); - float none[1] = {0.f}; - tad->createTadOnlyShapeInfo(); - tad->createOffsets(); - auto tadElementWiseStride = shape::elementWiseStride(tad->tadOnlyShapeInfo); - ASSERT_EQ(3,tadElementWiseStride); - functions::reduce::ReduceFloatFunction::exec( - opNum, - x, - xShapeInfo, - none, - result, - resultShapeInfo, - dimension, - dimensionLength, - tad->tadOnlyShapeInfo, - tad->tadOffsets, 0, tad->numTads); - - // for(int i = 0; i < shape::length(resultShapeInfo); i++) - // printf("%f\n",result[i]); - - delete tad; - delete[] xShapeInfo; -} - -#endif \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/ShapeUtilsTests.cpp b/libnd4j/tests_cpu/layers_tests/ShapeUtilsTests.cpp index 36fce0dd9..25f4f2c18 100644 --- a/libnd4j/tests_cpu/layers_tests/ShapeUtilsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/ShapeUtilsTests.cpp @@ -60,7 +60,7 @@ TEST_F(ShapeUtilsTests, EvalBroadcastShapeInfo_1) NDArray x(xShapeInfo); NDArray y(yShapeInfo); - Nd4jLong *newShapeInfo = nullptr; + const Nd4jLong *newShapeInfo = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, false, newShapeInfo, nullptr); ASSERT_TRUE(shape::equalsStrict(expShapeInfo, newShapeInfo)); @@ -77,7 +77,7 @@ TEST_F(ShapeUtilsTests, EvalBroadcastShapeInfo_2) NDArray x(xShapeInfo); NDArray y(yShapeInfo); - Nd4jLong *newShapeInfo = nullptr; + const Nd4jLong *newShapeInfo = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, false, newShapeInfo, nullptr); ASSERT_TRUE(shape::equalsStrict(expShapeInfo, newShapeInfo)); @@ -94,7 +94,7 @@ TEST_F(ShapeUtilsTests, EvalBroadcastShapeInfo_3) NDArray x(xShapeInfo); NDArray y(yShapeInfo); - Nd4jLong *newShapeInfo = nullptr; + const Nd4jLong *newShapeInfo = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, false, newShapeInfo, nullptr); ASSERT_TRUE(shape::equalsStrict(expShapeInfo, newShapeInfo)); @@ -111,7 +111,7 @@ TEST_F(ShapeUtilsTests, EvalBroadcastShapeInfo_4) NDArray x(xShapeInfo); NDArray y(yShapeInfo); - Nd4jLong *newShapeInfo = nullptr; + const Nd4jLong *newShapeInfo = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, false, newShapeInfo, nullptr); //for(int i=0; i<2*newShapeInfo[0]+4; ++i) // std::cout<('c', {2,4,5}); std::vector dimensions = {1}; - auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.getShapeInfo()); + auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.shapeInfo()); - ASSERT_TRUE(shape::shapeEquals(expected.getShapeInfo(), newShapeInfo)); + ASSERT_TRUE(shape::shapeEquals(expected.shapeInfo(), newShapeInfo)); } ////////////////////////////////////////////////////////////////// @@ -141,9 +141,9 @@ TEST_F(ShapeUtilsTests, evalReduceShapeInfo_test2) auto expected = NDArrayFactory::create('c', {2,1,4,5}); std::vector dimensions = {1}; - auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.getShapeInfo(), true); + auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.shapeInfo(), true); - ASSERT_TRUE(shape::shapeEquals(expected.getShapeInfo(), newShapeInfo)); + ASSERT_TRUE(shape::shapeEquals(expected.shapeInfo(), newShapeInfo)); } ////////////////////////////////////////////////////////////////// @@ -154,9 +154,9 @@ TEST_F(ShapeUtilsTests, evalReduceShapeInfo_test3) auto expected = NDArrayFactory::create('c', {1,1,1,5}); std::vector dimensions = {0,1,2}; - auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.getShapeInfo(), true); + auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.shapeInfo(), true); - ASSERT_TRUE(shape::shapeEquals(expected.getShapeInfo(), newShapeInfo)); + ASSERT_TRUE(shape::shapeEquals(expected.shapeInfo(), newShapeInfo)); } @@ -168,9 +168,9 @@ TEST_F(ShapeUtilsTests, evalReduceShapeInfo_test4) auto expected = NDArrayFactory::create('c', {1,1,1,1}); std::vector dimensions = {0,1,2,3}; - auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.getShapeInfo(), true); + auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.shapeInfo(), true); - ASSERT_TRUE(shape::shapeEquals(expected.getShapeInfo(), newShapeInfo)); + ASSERT_TRUE(shape::shapeEquals(expected.shapeInfo(), newShapeInfo)); } TEST_F(ShapeUtilsTests, Test_Strings_1) { diff --git a/libnd4j/tests_cpu/layers_tests/SparseUtilsTest.cpp b/libnd4j/tests_cpu/layers_tests/SparseUtilsTest.cpp index becd5a21f..37f52568f 100644 --- a/libnd4j/tests_cpu/layers_tests/SparseUtilsTest.cpp +++ b/libnd4j/tests_cpu/layers_tests/SparseUtilsTest.cpp @@ -131,7 +131,7 @@ TEST_F(SparseUtilsTest, SortCOOindices_Test) { 34, 35, 36, 37, 38, 39 }); - sd::sparse::SparseUtils::sortCooIndicesGeneric(indicesArr, reinterpret_cast(values.getBuffer()), nnz, rank); + sd::sparse::SparseUtils::sortCooIndicesGeneric(indicesArr, reinterpret_cast(values.buffer()), nnz, rank); for ( int i = 0; i < rank * nnz; ++i){ ASSERT_EQ(expIndicesArr[i], indicesArr[i]); diff --git a/libnd4j/tests_cpu/layers_tests/TadTests.cpp b/libnd4j/tests_cpu/layers_tests/TadTests.cpp index 5dfdf401d..a2cdec003 100644 --- a/libnd4j/tests_cpu/layers_tests/TadTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/TadTests.cpp @@ -51,7 +51,7 @@ TEST_F(TadTests, Test4DTad1) { int dim = 1; shape::TAD tad; - tad.init(arrayBad->getShapeInfo(), &dim, 1); + tad.init(arrayBad->shapeInfo(), &dim, 1); tad.createTadOnlyShapeInfo(); tad.createOffsets(); @@ -70,10 +70,10 @@ TEST_F(TadTests, TestNumTads1) { std::vector dim({0}); - Nd4jLong tadLengthX = shape::tadLength(x.getShapeInfo(), dim.data(), dim.size()); + Nd4jLong tadLengthX = shape::tadLength(x.shapeInfo(), dim.data(), dim.size()); Nd4jLong numTadsX = x.lengthOf() / tadLengthX; - Nd4jLong tadLengthY = shape::tadLength(y.getShapeInfo(), dim.data(), dim.size()); + Nd4jLong tadLengthY = shape::tadLength(y.shapeInfo(), dim.data(), dim.size()); Nd4jLong numTadsY = y.lengthOf() / tadLengthY; ASSERT_EQ(2, tadLengthX); @@ -91,18 +91,18 @@ TEST_F(TadTests, TestShapeTad_1) { NDArray input(buff, shapeInfo); std::vector dimensions = {0,1,2}; - Nd4jLong tadLength = shape::tadLength(input.getShapeInfo(), dimensions.data(), dimensions.size()); + Nd4jLong tadLength = shape::tadLength(input.shapeInfo(), dimensions.data(), dimensions.size()); Nd4jLong numTads = input.lengthOf() / tadLength; shape::TAD tad; - tad.init(input.getShapeInfo(), dimensions.data(), dimensions.size()); + tad.init(input.shapeInfo(), dimensions.data(), dimensions.size()); tad.createTadOnlyShapeInfo(); tad.createOffsets(); auto tadShapeInfo = new Nd4jLong[shape::shapeInfoLength(tad.tadOnlyShapeInfo[0])]; std::memcpy(tadShapeInfo, tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - float* tadBuff = reinterpret_cast(input.getBuffer()) + tad.tadOffsets[0]; + float* tadBuff = reinterpret_cast(input.buffer()) + tad.tadOffsets[0]; NDArray tadArr(tadBuff, tadShapeInfo); ASSERT_TRUE(numTads==1); @@ -296,7 +296,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n1[] = {20,25,30,35, 80,85,90,95}; int minIdx = 5; - int N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y1.getShapeInfo(), dimsToExclude1.data()); + int N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y1.shapeInfo(), dimsToExclude1.data()); ASSERT_TRUE(N == x.lengthOf()/y1.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n1[i] == maxIdxs[i]); @@ -306,7 +306,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n2[] = {12,32,52, 72,92,112}; minIdx = 12; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y2.getShapeInfo(), dimsToExclude2.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y2.shapeInfo(), dimsToExclude2.data()); ASSERT_TRUE(N == x.lengthOf()/y2.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n2[i] == maxIdxs[i]); @@ -316,7 +316,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n3[] = {64,69,74,79,84,89,94,99,104,109,114,119}; minIdx = 9; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y3.getShapeInfo(), dimsToExclude3.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y3.shapeInfo(), dimsToExclude3.data()); ASSERT_TRUE(N == x.lengthOf()/y3.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n3[i] == maxIdxs[i]); @@ -326,7 +326,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n4[] = {20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39}; minIdx = 1; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y4.getShapeInfo(), dimsToExclude4.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y4.shapeInfo(), dimsToExclude4.data()); ASSERT_TRUE(N == x.lengthOf()/y4.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n4[i] == maxIdxs[i]); @@ -336,7 +336,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n5[] = {65,66,67,68,69, 85,86,87,88,89, 105,106,107,108,109}; minIdx = 5; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y5.getShapeInfo(), dimsToExclude5.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y5.shapeInfo(), dimsToExclude5.data()); ASSERT_TRUE(N == x.lengthOf()/y5.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n5[i] == maxIdxs[i]); @@ -346,7 +346,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n6[] = {65,66,67,68,69}; minIdx = 13; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y6.getShapeInfo(), dimsToExclude6.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y6.shapeInfo(), dimsToExclude6.data()); ASSERT_TRUE(N == x.lengthOf()/y6.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n6[i] == maxIdxs[i]); @@ -356,7 +356,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n7[] = {15,16,17,18,19, 35,36,37,38,39, 55,56,57,58,59, 75,76,77,78,79, 95,96,97,98,99, 115,116,117,118,119}; minIdx = 3; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y7.getShapeInfo(), dimsToExclude7.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y7.shapeInfo(), dimsToExclude7.data()); ASSERT_TRUE(N == x.lengthOf()/y7.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n7[i] == maxIdxs[i]); @@ -366,7 +366,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n8[] = {0,5,10,15, 20,25,30,35, 40,45,50,55, 60,65,70,75, 80,85,90,95, 100,105,110,115}; minIdx = 0; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y8.getShapeInfo(), dimsToExclude8.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y8.shapeInfo(), dimsToExclude8.data()); ASSERT_TRUE(N == x.lengthOf()/y8.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n8[i] == maxIdxs[i]); @@ -376,7 +376,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n9[] = {60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119}; minIdx = 1; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y9.getShapeInfo(), dimsToExclude9.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y9.shapeInfo(), dimsToExclude9.data()); ASSERT_TRUE(N == x.lengthOf()/y9.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n9[i] == maxIdxs[i]); @@ -386,7 +386,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n10[] = {11, 71}; minIdx = 11; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y10.getShapeInfo(), dimsToExclude10.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y10.shapeInfo(), dimsToExclude10.data()); ASSERT_TRUE(N == x.lengthOf()/y10.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n10[i] == maxIdxs[i]); @@ -396,7 +396,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n11[] = {66, 86, 106}; minIdx = 26; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y11.getShapeInfo(), dimsToExclude11.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y11.shapeInfo(), dimsToExclude11.data()); ASSERT_TRUE(N == x.lengthOf()/y11.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n11[i] == maxIdxs[i]); @@ -406,7 +406,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n12[] = {0,2,4,5,7,9,10,12,14,15,17,19,60,62,64,65,67,69,70,72,74,75,77,79}; minIdx = 0; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y12.getShapeInfo(), dimsToExclude12.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y12.shapeInfo(), dimsToExclude12.data()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n12[i] == maxIdxs[i]); @@ -415,7 +415,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n13[] = {1,3,6,8,11,13,16,18,61,63,66,68,71,73,76,78}; minIdx = 1; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y13.getShapeInfo(), dimsToExclude13.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y13.shapeInfo(), dimsToExclude13.data()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n13[i] == maxIdxs[i]); @@ -423,7 +423,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n14[] = {12,32,52, 72,92,112}; minIdx = 12; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y14.getShapeInfo(), nullptr); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y14.shapeInfo(), nullptr); ASSERT_TRUE(N == x.lengthOf()/y14.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n14[i] == maxIdxs[i]); @@ -432,7 +432,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n15[] = {11, 71}; minIdx = 11; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y15.getShapeInfo(), nullptr); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y15.shapeInfo(), nullptr); ASSERT_TRUE(N == x.lengthOf()/y15.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n15[i] == maxIdxs[i]);