+ + +mlx.core.block_masked_mm# + + +block_masked_mm(a: array, b: array, /, block_size: int = 64, mask_out: array, mask_lhs: array, mask_rhs: array, *, stream: Union[None, Stream, Device] = None) → array# +Matrix multiplication with block masking. +Perform the (possibly batched) matrix multiplication of two arrays and with blocks +of size block_size x block_size optionally masked out. +Assuming a with shape (…, M, K) and b with shape (…, K, N) + +lhs_mask must have shape (…, \(\lceil\) M / block_size \(\rceil\), \(\lceil\) K / block_size \(\rceil\)) +rhs_mask must have shape (…, \(\lceil\) K / block_size \(\rceil\), \(\lceil\) N / block_size \(\rceil\)) +out_mask must have shape (…, \(\lceil\) M / block_size \(\rceil\), \(\lceil\) N / block_size \(\rceil\)) + +Note: Only block_size=64 and block_size=32 are currently supported + +Parameters: + +a (array) – Input array or scalar. +b (array) – Input array or scalar. +block_size (int) – Size of blocks to be masked. Must be 32 or 64 (default: 64) +mask_out (array, optional) – Boolean mask for output (default: None) +mask_lhs (array, optional) – Boolean mask for a (default: None) +mask_rhs (array, optional) – Boolean mask for b (default: None) + + + + + + + + +
+ + +mlx.core.degrees# + + +degrees(a: array, /, *, stream: None | Stream | Device = None) → array# +Convert angles from radians to degrees. + +Parameters: +a (array) – Input array. + +Returns: +The angles in degrees. + +Return type: +array + + + + + + + +
+ + +mlx.core.metal.clear_cache# + + +clear_cache() → None# +Clear the memory cache. +After calling this, get_cache_memory() should return 0. + + + + + +
+ + +mlx.core.not_equal# + + +not_equal(a: scalar | array, b: scalar | array, stream: None | Stream | Device = None) → array# +Element-wise not equal. +Not equal comparison on two arrays with numpy-style broadcasting semantics. +Either or both input arrays can also be scalars. + +Parameters: + +a (array) – Input array or scalar. +b (array) – Input array or scalar. + + +Returns: +The element-wise comparison a != b. + +Return type: +array + + + + + + + +
+ + +mlx.core.radians# + + +radians(a: array, /, *, stream: None | Stream | Device = None) → array# +Convert angles from degrees to radians. + +Parameters: +a (array) – Input array. + +Returns: +The angles in radians. + +Return type: +array + + + + + + + +
+ + +mlx.core.synchronize# + + +synchronize(stream: Optional[Stream] = None) → None# +Synchronize with the given stream. + +Parameters: +stream (Stream, optional) – The stream to synchronize with. If None +then the default stream of the default device is used. +Default: None. + + + + + + + +
+ + +mlx.nn.quantize# + + +quantize(model: Module, group_size: int = 64, bits: int = 4, class_predicate: callable | None = None)# +Quantize the sub-modules of a module according to a predicate. +By default all Linear and Embedding layers will be +quantized. Note also, the module is updated in-place. + +Parameters: + +model (Module) – The model whose leaf modules may be quantized. +group_size (int) – The quantization group size (see +mlx.core.quantize()). Default: 64. +bits (int) – The number of bits per parameter (see +mlx.core.quantize()). Default: 4. +class_predicate (Optional[Callable]) – A callable which receives the +Module path and Module itself and returns True if +it should be quantized and False otherwise. If None, then +all linear and embedding layers are quantized. Default: None. + + + + + + + + +
+ + +mlx.utils.tree_map_with_path# + + +tree_map_with_path(fn, tree, *rest, is_leaf=None, path=None)# +Applies fn to the path and leaves of the Python tree tree and +returns a new collection with the results. +This function is the same tree_map() but the fn takes the path as +the first argument followed by the remaining tree nodes. + +Parameters: + +fn (callable) – The function that processes the leaves of the tree. +tree (Any) – The main Python tree that will be iterated upon. +rest (tuple[Any]) – Extra trees to be iterated together with tree. +is_leaf (callable, optional) – An optional callable that returns True +if the passed object is considered a leaf or False otherwise. + + +Returns: +A Python tree with the new values returned by fn. + + +Example +>>> from mlx.utils import tree_map_with_path +>>> tree = {"model": [{"w": 0, "b": 1}, {"w": 0, "b": 1}]} +>>> new_tree = tree_map_with_path(lambda path, _: print(path), tree) +model.0.w +model.0.b +model.1.w +model.1.b + + + + + + + +
+ + +mlx.nn.QuantizedEmbedding# + + +class QuantizedEmbedding(num_embeddings: int, dims: int, group_size: int = 64, bits: int = 4)# +The same as Embedding but with a quantized weight matrix. +QuantizedEmbedding also provides a from_embedding() +classmethod to convert embedding layers to QuantizedEmbedding +layers. + +Parameters: + +num_embeddings (int) – How many possible discrete tokens can we embed. +Usually called the vocabulary size. +dims (int) – The dimensionality of the embeddings. +group_size (int, optional) – The group size to use for the quantized +weight. See quantize(). Default: 64. +bits (int, optional) – The bit width to use for the quantized weight. +See quantize(). Default: 4. + + + +Methods + + +as_linear(x) +Call the quantized embedding layer as a quantized linear layer. + +from_embedding(embedding_layer[, ...]) +Create a QuantizedEmbedding layer from an Embedding layer. + + + + + + + + +