| 
| template<typename T>  | 
| METAL_FUNC void  | thread_swap (thread T &a, thread T &b) | 
|   | 
| template<typename T, typename U, bool ARG_SORT, short BLOCK_THREADS, short N_PER_THREAD>  | 
| void  | block_sort (const device T *inp, device U *out, const constant int &size_sorted_axis, const constant int &in_stride_sorted_axis, const constant int &out_stride_sorted_axis, const constant int &in_stride_segment_axis, const constant int &out_stride_segment_axis, uint3 tid, uint3 lid) | 
|   | 
| template<typename T, typename U, bool ARG_SORT, short BLOCK_THREADS, short N_PER_THREAD>  | 
| void  | block_sort_nc (const device T *inp, device U *out, const constant int &size_sorted_axis, const constant int &in_stride_sorted_axis, const constant int &out_stride_sorted_axis, const constant int &nc_dim, const constant int *nc_shape, const constant int64_t *in_nc_strides, const constant int64_t *out_nc_strides, uint3 tid, uint3 lid) | 
|   | 
| template<typename ValT, typename IdxT, bool ARG_SORT, short BLOCK_THREADS, short N_PER_THREAD>  | 
| void  | mb_block_sort (const device ValT *inp, device ValT *out_vals, device IdxT *out_idxs, const constant int &size_sorted_axis, const constant int &stride_sorted_axis, const constant int &nc_dim, const constant int *nc_shape, const constant int64_t *nc_strides, uint3 tid, uint3 lid) | 
|   | 
| template<typename ValT, typename IdxT, bool ARG_SORT, short BLOCK_THREADS, short N_PER_THREAD>  | 
| void  | mb_block_partition (device IdxT *block_partitions, const device ValT *dev_vals, const device IdxT *dev_idxs, const constant int &size_sorted_axis, const constant int &merge_tiles, const constant int &n_blocks, uint3 tid, uint3 lid, uint3 tgp_dims) | 
|   | 
| template<typename ValT, typename IdxT, bool ARG_SORT, short BLOCK_THREADS, short N_PER_THREAD, typename CompareOp = LessThan<ValT>>  | 
| void  | mb_block_merge (const device IdxT *block_partitions, const device ValT *dev_vals_in, const device IdxT *dev_idxs_in, device ValT *dev_vals_out, device IdxT *dev_idxs_out, const constant int &size_sorted_axis, const constant int &merge_tiles, const constant int &num_tiles, uint3 tid, uint3 lid) | 
|   |