mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-08 18:06:37 +08:00
Stable Diffusion: Input image downsampling (#276)
This commit is contained in:
parent
2ba5d3db14
commit
b4c20cc7f7
@ -67,7 +67,7 @@ Image 2 Image
|
|||||||
There is also the option of generating images based on another image using the
|
There is also the option of generating images based on another image using the
|
||||||
example script `image2image.py`. To do that an image is first encoded using the
|
example script `image2image.py`. To do that an image is first encoded using the
|
||||||
autoencoder to get its latent representation and then noise is added according
|
autoencoder to get its latent representation and then noise is added according
|
||||||
to the forward diffusion process and the `strength` parameter. A `stregnth` of
|
to the forward diffusion process and the `strength` parameter. A `strength` of
|
||||||
0.0 means no noise and a `strength` of 1.0 means starting from completely
|
0.0 means no noise and a `strength` of 1.0 means starting from completely
|
||||||
random noise.
|
random noise.
|
||||||
|
|
||||||
@ -78,6 +78,7 @@ The command to generate the above images is:
|
|||||||
|
|
||||||
python image2image.py --strength 0.5 original.png 'A lit fireplace'
|
python image2image.py --strength 0.5 original.png 'A lit fireplace'
|
||||||
|
|
||||||
|
*Note: `image2image.py` will automatically downsample your input image to guarantee that its dimensions are divisible by 64. If you want full control of this process, resize your image prior to using the script.*
|
||||||
|
|
||||||
Performance
|
Performance
|
||||||
-----------
|
-----------
|
||||||
|
@ -28,7 +28,15 @@ if __name__ == "__main__":
|
|||||||
sd = StableDiffusion()
|
sd = StableDiffusion()
|
||||||
|
|
||||||
# Read the image
|
# Read the image
|
||||||
img = mx.array(np.array(Image.open(args.image)))
|
img = Image.open(args.image)
|
||||||
|
|
||||||
|
# Make sure image shape is divisible by 64
|
||||||
|
W, H = (dim - dim % 64 for dim in (img.width, img.height))
|
||||||
|
if W != img.width or H != img.height:
|
||||||
|
print(f"Warning: image shape is not divisible by 64, downsampling to {W}x{H}")
|
||||||
|
img = img.resize((W, H), Image.NEAREST) # use desired downsampling filter
|
||||||
|
|
||||||
|
img = mx.array(np.array(img))
|
||||||
img = (img[:, :, :3].astype(mx.float32) / 255) * 2 - 1
|
img = (img[:, :, :3].astype(mx.float32) / 255) * 2 - 1
|
||||||
|
|
||||||
# Noise and denoise the latents produced by encoding img.
|
# Noise and denoise the latents produced by encoding img.
|
||||||
|
Loading…
Reference in New Issue
Block a user