fix gemv regression (#2445)

This commit is contained in:
Awni Hannun
2025-07-30 14:23:01 -07:00
committed by GitHub
parent b405591249
commit d32519c8ee
3 changed files with 36 additions and 9 deletions

View File

@@ -43,10 +43,18 @@ struct alignas(sizeof(T) * N) AlignedVector {
};
template <int N, typename T>
inline __device__ bool is_aligned(T* x) {
inline __host__ __device__ bool is_aligned(T* x) {
return (reinterpret_cast<uintptr_t>(x) % (N * sizeof(T))) == 0;
}
template <int N, typename T>
inline __device__ AlignedVector<T, N> unsafe_load_vector(
const T* ptr,
uint32_t offset) {
auto* from = reinterpret_cast<const AlignedVector<T, N>*>(ptr);
return from[offset];
}
template <int N, typename T>
inline __device__ AlignedVector<T, N> load_vector(
const T* ptr,
@@ -101,6 +109,13 @@ inline __device__ AlignedVector<T, N> load_vector(
}
}
template <int N, typename T>
inline __device__ void
unsafe_store_vector(T* ptr, uint32_t offset, const AlignedVector<T, N>& vec) {
auto* to = reinterpret_cast<AlignedVector<T, N>*>(ptr);
to[offset] = vec;
}
template <int N, typename T>
inline __device__ void
store_vector(T* ptr, uint32_t offset, const AlignedVector<T, N>& vec) {