|
template<typename T, typename U, typename Op, typename IdxT, int NDIMS> |
void | col_reduce_small (const device T *in, device U *out, const constant size_t &reduction_size, const constant int64_t &reduction_stride, const constant int *shape, const constant int64_t *strides, const constant int &ndim, const constant int *reduce_shape, const constant int64_t *reduce_strides, const constant int &reduce_ndim, const constant size_t &non_col_reductions, uint3 gid, uint3 gsize, uint3 lid, uint3 lsize) |
|
template<typename T, typename U, typename Op, typename IdxT, int NDIMS> |
void | col_reduce_longcolumn (const device T *in, device U *out, const constant size_t &reduction_size, const constant size_t &reduction_stride, const constant int *shape, const constant int64_t *strides, const constant int &ndim, const constant int *reduce_shape, const constant int64_t *reduce_strides, const constant int &reduce_ndim, const constant size_t &non_col_reductions, const constant size_t &out_size, uint3 gid, uint3 gsize, uint3 lid, uint3 lsize) |
|
template<typename T, typename U, typename Op, typename IdxT, int NDIMS, int BM, int BN> |
void | col_reduce_looped (const device T *in, device U *out, const constant size_t &reduction_size, const constant int64_t &reduction_stride, const constant int *shape, const constant int64_t *strides, const constant int &ndim, const constant int *reduce_shape, const constant int64_t *reduce_strides, const constant int &reduce_ndim, const constant size_t &non_col_reductions, uint3 gid, uint3 gsize, uint simd_lane_id, uint simd_group_id) |
| Our approach is the following simple looped approach:
|
|
template<typename T, typename U, typename Op, typename IdxT, int NDIMS, int BM, int BN> |
void | col_reduce_2pass (const device T *in, device U *out, const constant size_t &reduction_size, const constant int64_t &reduction_stride, const constant int *shape, const constant int64_t *strides, const constant int &ndim, const constant int *reduce_shape, const constant int64_t *reduce_strides, const constant int &reduce_ndim, const constant size_t &non_col_reductions, const constant size_t &out_size, uint3 gid, uint3 gsize, uint simd_lane_id, uint simd_group_id) |
|