mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-25 01:41:19 +08:00

This commit introduces a comprehensive memory estimation utility for MLX language models, supporting: - Dynamic parameter calculation across diverse model architectures - Handling of quantized and standard models - Estimation of model weights, KV cache, and overhead memory - Support for bounded and unbounded KV cache modes - Flexible configuration via command-line arguments The new tool provides detailed memory usage insights for different model configurations and generation scenarios.
16 lines
295 B
Python
16 lines
295 B
Python
# Copyright © 2023-2024 Apple Inc.
|
|
|
|
import os
|
|
|
|
from ._version import __version__
|
|
|
|
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
|
|
|
from .utils import convert, generate, load, stream_generate
|
|
|
|
|
|
def get_estimate_mem():
|
|
from .estimate_memory import estimate_mem
|
|
|
|
return estimate_mem
|