mirror of
https://github.com/ml-explore/mlx.git
synced 2025-06-24 17:31:16 +08:00

Launch 2D grid to eliminate divide and mod in device code, since 64b integer division is very expensive. Github Issue #506 Co-authored-by: Vijay Krishnamoorthy <vijay_krish@apple.com>
35 lines
681 B
Python
35 lines
681 B
Python
# Copyright © 2023-2024 Apple Inc.
|
|
|
|
import time
|
|
|
|
import mlx.core as mx
|
|
|
|
|
|
def time_fn(fn, *args, **kwargs):
|
|
print(f"Timing {fn.__name__} ...", end=" ")
|
|
|
|
# warmup
|
|
for _ in range(5):
|
|
mx.eval(fn(*args, **kwargs))
|
|
|
|
num_iters = 100
|
|
tic = time.perf_counter()
|
|
for _ in range(num_iters):
|
|
x = mx.eval(fn(*args, **kwargs))
|
|
toc = time.perf_counter()
|
|
|
|
msec = 1e3 * (toc - tic) / num_iters
|
|
print(f"{msec:.5f} msec")
|
|
|
|
|
|
def measure_runtime(fn, **kwargs):
|
|
# Warmup
|
|
for _ in range(5):
|
|
fn(**kwargs)
|
|
|
|
tic = time.time()
|
|
iters = 10
|
|
for _ in range(iters):
|
|
fn(**kwargs)
|
|
return (time.time() - tic) * 1000 / iters
|