mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-19 10:48:09 +08:00
Transposed Convolution (#1245)
* initial implementation for conv_transpose ran pre-commit implemented conv_transpose updated conv_general docstring updated conv_general docstring updated code comments removed commented run_conv_checks updated acknowledgments added missing entry to ops.rst added op to nn.layers resolved merge conflicts * removed ConvolutionTranspose primitive as suggested by reviewer removed ConvolutionTranspose primitive as suggested by reviewer * remove transpose flag, add another test --------- Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:

committed by
GitHub

parent
ba3e913c7a
commit
efeb9c0f02
@@ -55,6 +55,11 @@ from mlx.nn.layers.activations import (
|
||||
from mlx.nn.layers.base import Module
|
||||
from mlx.nn.layers.containers import Sequential
|
||||
from mlx.nn.layers.convolution import Conv1d, Conv2d, Conv3d
|
||||
from mlx.nn.layers.convolution_transpose import (
|
||||
ConvTranspose1d,
|
||||
ConvTranspose2d,
|
||||
ConvTranspose3d,
|
||||
)
|
||||
from mlx.nn.layers.dropout import Dropout, Dropout2d, Dropout3d
|
||||
from mlx.nn.layers.embedding import Embedding
|
||||
from mlx.nn.layers.linear import Bilinear, Identity, Linear
|
||||
|
@@ -21,9 +21,9 @@ class Conv1d(Module):
|
||||
out_channels (int): The number of output channels
|
||||
kernel_size (int): The size of the convolution filters
|
||||
stride (int, optional): The stride when applying the filter.
|
||||
Default: 1.
|
||||
Default: ``1``.
|
||||
padding (int, optional): How many positions to 0-pad the input with.
|
||||
Default: 0.
|
||||
Default: ``0``.
|
||||
dilation (int, optional): The dilation of the convolution.
|
||||
bias (bool, optional): If ``True`` add a learnable bias to the output.
|
||||
Default: ``True``
|
||||
@@ -84,9 +84,9 @@ class Conv2d(Module):
|
||||
out_channels (int): The number of output channels.
|
||||
kernel_size (int or tuple): The size of the convolution filters.
|
||||
stride (int or tuple, optional): The size of the stride when
|
||||
applying the filter. Default: 1.
|
||||
applying the filter. Default: ``1``.
|
||||
padding (int or tuple, optional): How many positions to 0-pad
|
||||
the input with. Default: 0.
|
||||
the input with. Default: ``0``.
|
||||
dilation (int or tuple, optional): The dilation of the convolution.
|
||||
bias (bool, optional): If ``True`` add a learnable bias to the
|
||||
output. Default: ``True``
|
||||
|
206
python/mlx/nn/layers/convolution_transpose.py
Normal file
206
python/mlx/nn/layers/convolution_transpose.py
Normal file
@@ -0,0 +1,206 @@
|
||||
# Copyright © 2023 Apple Inc.
|
||||
|
||||
import math
|
||||
from typing import Union
|
||||
|
||||
import mlx.core as mx
|
||||
from mlx.nn.layers.base import Module
|
||||
|
||||
|
||||
class ConvTranspose1d(Module):
|
||||
"""Applies a 1-dimensional transposed convolution over the multi-channel input sequence.
|
||||
|
||||
The channels are expected to be last i.e. the input shape should be ``NLC`` where:
|
||||
|
||||
* ``N`` is the batch dimension
|
||||
* ``L`` is the sequence length
|
||||
* ``C`` is the number of input channels
|
||||
|
||||
Args:
|
||||
in_channels (int): The number of input channels
|
||||
out_channels (int): The number of output channels
|
||||
kernel_size (int): The size of the convolution filters
|
||||
stride (int, optional): The stride when applying the filter.
|
||||
Default: ``1``.
|
||||
padding (int, optional): How many positions to 0-pad the input with.
|
||||
Default: ``0``.
|
||||
dilation (int, optional): The dilation of the convolution.
|
||||
bias (bool, optional): If ``True`` add a learnable bias to the output.
|
||||
Default: ``True``
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
kernel_size: int,
|
||||
stride: int = 1,
|
||||
padding: int = 0,
|
||||
dilation: int = 1,
|
||||
bias: bool = True,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
scale = math.sqrt(1 / (in_channels * kernel_size))
|
||||
self.weight = mx.random.uniform(
|
||||
low=-scale,
|
||||
high=scale,
|
||||
shape=(out_channels, kernel_size, in_channels),
|
||||
)
|
||||
if bias:
|
||||
self.bias = mx.zeros((out_channels,))
|
||||
|
||||
self.padding = padding
|
||||
self.dilation = dilation
|
||||
self.stride = stride
|
||||
|
||||
def _extra_repr(self):
|
||||
return (
|
||||
f"{self.weight.shape[-1]}, {self.weight.shape[0]}, "
|
||||
f"kernel_size={self.weight.shape[1]}, stride={self.stride}, "
|
||||
f"padding={self.padding}, dilation={self.dilation}, "
|
||||
f"bias={'bias' in self}"
|
||||
)
|
||||
|
||||
def __call__(self, x):
|
||||
y = mx.conv_transpose1d(
|
||||
x, self.weight, self.stride, self.padding, self.dilation
|
||||
)
|
||||
if "bias" in self:
|
||||
y = y + self.bias
|
||||
return y
|
||||
|
||||
|
||||
class ConvTranspose2d(Module):
|
||||
"""Applies a 2-dimensional transposed convolution over the multi-channel input image.
|
||||
|
||||
The channels are expected to be last i.e. the input shape should be ``NHWC`` where:
|
||||
|
||||
* ``N`` is the batch dimension
|
||||
* ``H`` is the input image height
|
||||
* ``W`` is the input image width
|
||||
* ``C`` is the number of input channels
|
||||
|
||||
Args:
|
||||
in_channels (int): The number of input channels.
|
||||
out_channels (int): The number of output channels.
|
||||
kernel_size (int or tuple): The size of the convolution filters.
|
||||
stride (int or tuple, optional): The size of the stride when
|
||||
applying the filter. Default: ``1``.
|
||||
padding (int or tuple, optional): How many positions to 0-pad
|
||||
the input with. Default: ``0``.
|
||||
dilation (int or tuple, optional): The dilation of the convolution.
|
||||
bias (bool, optional): If ``True`` add a learnable bias to the
|
||||
output. Default: ``True``
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
kernel_size: Union[int, tuple],
|
||||
stride: Union[int, tuple] = 1,
|
||||
padding: Union[int, tuple] = 0,
|
||||
dilation: Union[int, tuple] = 1,
|
||||
bias: bool = True,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
kernel_size, stride, padding = map(
|
||||
lambda x: (x, x) if isinstance(x, int) else x,
|
||||
(kernel_size, stride, padding),
|
||||
)
|
||||
scale = math.sqrt(1 / (in_channels * kernel_size[0] * kernel_size[1]))
|
||||
self.weight = mx.random.uniform(
|
||||
low=-scale,
|
||||
high=scale,
|
||||
shape=(out_channels, *kernel_size, in_channels),
|
||||
)
|
||||
if bias:
|
||||
self.bias = mx.zeros((out_channels,))
|
||||
|
||||
self.padding = padding
|
||||
self.stride = stride
|
||||
self.dilation = dilation
|
||||
|
||||
def _extra_repr(self):
|
||||
return (
|
||||
f"{self.weight.shape[-1]}, {self.weight.shape[0]}, "
|
||||
f"kernel_size={self.weight.shape[1:2]}, stride={self.stride}, "
|
||||
f"padding={self.padding}, dilation={self.dilation}, "
|
||||
f"bias={'bias' in self}"
|
||||
)
|
||||
|
||||
def __call__(self, x):
|
||||
y = mx.conv_transpose2d(
|
||||
x, self.weight, self.stride, self.padding, self.dilation
|
||||
)
|
||||
if "bias" in self:
|
||||
y = y + self.bias
|
||||
return y
|
||||
|
||||
|
||||
class ConvTranspose3d(Module):
|
||||
"""Applies a 3-dimensional transposed convolution over the multi-channel input image.
|
||||
|
||||
The channels are expected to be last i.e. the input shape should be ``NDHWC`` where:
|
||||
|
||||
* ``N`` is the batch dimension
|
||||
* ``D`` is the input image depth
|
||||
* ``H`` is the input image height
|
||||
* ``W`` is the input image width
|
||||
* ``C`` is the number of input channels
|
||||
|
||||
Args:
|
||||
in_channels (int): The number of input channels.
|
||||
out_channels (int): The number of output channels.
|
||||
kernel_size (int or tuple): The size of the convolution filters.
|
||||
stride (int or tuple, optional): The size of the stride when
|
||||
applying the filter. Default: ``1``.
|
||||
padding (int or tuple, optional): How many positions to 0-pad
|
||||
the input with. Default: ``0``.
|
||||
bias (bool, optional): If ``True`` add a learnable bias to the
|
||||
output. Default: ``True``
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
kernel_size: Union[int, tuple],
|
||||
stride: Union[int, tuple] = 1,
|
||||
padding: Union[int, tuple] = 0,
|
||||
bias: bool = True,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
kernel_size, stride, padding = map(
|
||||
lambda x: (x, x, x) if isinstance(x, int) else x,
|
||||
(kernel_size, stride, padding),
|
||||
)
|
||||
scale = math.sqrt(
|
||||
1 / (in_channels * kernel_size[0] * kernel_size[1] * kernel_size[2])
|
||||
)
|
||||
self.weight = mx.random.uniform(
|
||||
low=-scale,
|
||||
high=scale,
|
||||
shape=(out_channels, *kernel_size, in_channels),
|
||||
)
|
||||
if bias:
|
||||
self.bias = mx.zeros((out_channels,))
|
||||
|
||||
self.padding = padding
|
||||
self.stride = stride
|
||||
|
||||
def _extra_repr(self):
|
||||
return (
|
||||
f"{self.weight.shape[-1]}, {self.weight.shape[0]}, "
|
||||
f"kernel_size={self.weight.shape[1:3]}, stride={self.stride}, "
|
||||
f"padding={self.padding}, bias={'bias' in self}"
|
||||
)
|
||||
|
||||
def __call__(self, x):
|
||||
y = mx.conv_transpose3d(x, self.weight, self.stride, self.padding)
|
||||
if "bias" in self:
|
||||
y = y + self.bias
|
||||
return y
|
Reference in New Issue
Block a user