batch_size | ReadWriter< in_T, out_T, step, four_step_real > | |
buf | ReadWriter< in_T, out_T, step, four_step_real > | |
compute_strided_indices(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | inline |
elem | ReadWriter< in_T, out_T, step, four_step_real > | |
elems_per_thread | ReadWriter< in_T, out_T, step, four_step_real > | |
grid | ReadWriter< in_T, out_T, step, four_step_real > | |
in | ReadWriter< in_T, out_T, step, four_step_real > | |
inv | ReadWriter< in_T, out_T, step, four_step_real > | |
load() const | ReadWriter< in_T, out_T, step, four_step_real > | inline |
load() const | ReadWriter< in_T, out_T, step, four_step_real > | |
load() const | ReadWriter< in_T, out_T, step, four_step_real > | |
load_padded(int length, const device float2 *w_k) const | ReadWriter< in_T, out_T, step, four_step_real > | inline |
load_padded(int length, const device float2 *w_k) const | ReadWriter< in_T, out_T, step, four_step_real > | |
load_padded(int length, const device float2 *w_k) const | ReadWriter< in_T, out_T, step, four_step_real > | |
load_strided(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | inline |
load_strided(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | |
load_strided(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | |
load_strided(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | |
load_strided(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | |
n | ReadWriter< in_T, out_T, step, four_step_real > | |
out | ReadWriter< in_T, out_T, step, four_step_real > | |
out_of_bounds() const | ReadWriter< in_T, out_T, step, four_step_real > | inline |
out_of_bounds() const | ReadWriter< in_T, out_T, step, four_step_real > | |
out_of_bounds() const | ReadWriter< in_T, out_T, step, four_step_real > | |
post_in(float2 elem) const | ReadWriter< in_T, out_T, step, four_step_real > | inline |
post_in(float elem) const | ReadWriter< in_T, out_T, step, four_step_real > | inline |
pre_out(float2 elem) const | ReadWriter< in_T, out_T, step, four_step_real > | inline |
pre_out(float2 elem, int length) const | ReadWriter< in_T, out_T, step, four_step_real > | inline |
ReadWriter(const device in_T *in_, threadgroup float2 *buf_, device out_T *out_, const short n_, const int batch_size_, const short elems_per_thread_, const uint3 elem_, const uint3 grid_, const bool inv_) | ReadWriter< in_T, out_T, step, four_step_real > | inline |
strided_device_idx | ReadWriter< in_T, out_T, step, four_step_real > | |
strided_shared_idx | ReadWriter< in_T, out_T, step, four_step_real > | |
threads_per_tg | ReadWriter< in_T, out_T, step, four_step_real > | |
write() const | ReadWriter< in_T, out_T, step, four_step_real > | inline |
write() const | ReadWriter< in_T, out_T, step, four_step_real > | |
write() const | ReadWriter< in_T, out_T, step, four_step_real > | |
write_padded(int length, const device float2 *w_k) const | ReadWriter< in_T, out_T, step, four_step_real > | inline |
write_padded(int length, const device float2 *w_k) const | ReadWriter< in_T, out_T, step, four_step_real > | |
write_padded(int length, const device float2 *w_k) const | ReadWriter< in_T, out_T, step, four_step_real > | |
write_strided(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | inline |
write_strided(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | |
write_strided(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | |
write_strided(int stride, int overall_n) | ReadWriter< in_T, out_T, step, four_step_real > | |