FLUX: move cli to mlx_flux dir

2025-08-30 02:53:41 +08:00 · 2024-11-07 12:35:49 +08:00 · 2024-11-07 12:35:49 +08:00 · e61849a003
commit e61849a003
parent 83c92c2a11
2 changed files with 29 additions and 15 deletions
--- a/flux/mlx_flux/dreambooth.py
+++ b/flux/mlx_flux/dreambooth.py
@ -1,19 +1,20 @@
 # Copyright © 2024 Apple Inc.

 import argparse
-import time
-from functools import partial
-from pathlib import Path
-
 import mlx.core as mx
 import mlx.nn as nn
 import mlx.optimizers as optim
 import numpy as np
+import time
+from PIL import Image
+from functools import partial
 from mlx.nn.utils import average_gradients
 from mlx.utils import tree_flatten, tree_map, tree_reduce
-from PIL import Image
+from pathlib import Path

-from mlx_flux import FluxPipeline, Trainer, load_dataset
+from .datasets import load_dataset
+from .flux import FluxPipeline
+from .trainer import Trainer


 def generate_progress_images(iteration, flux, args):
@ -186,6 +187,7 @@ if __name__ == "__main__":
    optimizer = optim.Adam(learning_rate=lr_schedule)
    state = [flux.flow.state, optimizer.state, mx.random.state]

+
    @partial(mx.compile, inputs=state, outputs=state)
    def single_step(x, t5_feat, clip_feat, guidance):
        loss, grads = nn.value_and_grad(flux.flow, flux.training_loss)(
@ -196,12 +198,14 @@ if __name__ == "__main__":

        return loss

+
    @partial(mx.compile, inputs=state, outputs=state)
    def compute_loss_and_grads(x, t5_feat, clip_feat, guidance):
        return nn.value_and_grad(flux.flow, flux.training_loss)(
            x, t5_feat, clip_feat, guidance
        )

+
    @partial(mx.compile, inputs=state, outputs=state)
    def compute_loss_and_accumulate_grads(x, t5_feat, clip_feat, guidance, prev_grads):
        loss, grads = nn.value_and_grad(flux.flow, flux.training_loss)(
@ -210,6 +214,7 @@ if __name__ == "__main__":
        grads = tree_map(lambda a, b: a + b, prev_grads, grads)
        return loss, grads

+
    @partial(mx.compile, inputs=state, outputs=state)
    def grad_accumulate_and_step(x, t5_feat, clip_feat, guidance, prev_grads):
        loss, grads = nn.value_and_grad(flux.flow, flux.training_loss)(
@ -225,6 +230,7 @@ if __name__ == "__main__":

        return loss

+
    # We simply route to the appropriate step based on whether we have
    # gradients from a previous step and whether we should be performing an
    # update or simply computing and accumulating gradients in this step.
@ -247,6 +253,7 @@ if __name__ == "__main__":
                    x, t5_feat, clip_feat, guidance, prev_grads
                )

+
    dataset = load_dataset(args.dataset)
    trainer = Trainer(flux, dataset, args)
    trainer.encode_dataset()
@ -266,7 +273,7 @@ if __name__ == "__main__":

        if (i + 1) % 10 == 0:
            toc = time.time()
-            peak_mem = mx.metal.get_peak_memory() / 1024**3
+            peak_mem = mx.metal.get_peak_memory() / 1024 ** 3
            print(
                f"Iter: {i + 1} Loss: {sum(losses) / 10:.3f} "
                f"It/s: {10 / (toc - tic):.3f} "
--- a/flux/mlx_flux/txt2image.py
+++ b/flux/mlx_flux/txt2image.py
@ -1,14 +1,13 @@
 # Copyright © 2024 Apple Inc.

 import argparse
-
 import mlx.core as mx
 import mlx.nn as nn
 import numpy as np
 from PIL import Image
 from tqdm import tqdm

-from mlx_flux import FluxPipeline
+from .flux import FluxPipeline


 def to_latent_size(image_size):
@ -39,7 +38,7 @@ def load_adapter(flux, adapter_file, fuse=False):
        flux.fuse_lora_layers()


-if __name__ == "__main__":
+def build_parser():
    parser = argparse.ArgumentParser(
        description="Generate images from a textual prompt using stable diffusion"
    )
@ -62,7 +61,11 @@ if __name__ == "__main__":
    parser.add_argument("--adapter")
    parser.add_argument("--fuse-adapter", action="store_true")
    parser.add_argument("--no-t5-padding", dest="t5_padding", action="store_false")
-    args = parser.parse_args()
+    return parser
+
+
+def main():
+    args = build_parser().parse_args()

    # Load the models
    flux = FluxPipeline("flux-" + args.model, t5_padding=args.t5_padding)
@ -93,7 +96,7 @@ if __name__ == "__main__":
    # First we get and eval the conditioning
    conditioning = next(latents)
    mx.eval(conditioning)
-    peak_mem_conditioning = mx.metal.get_peak_memory() / 1024**3
+    peak_mem_conditioning = mx.metal.get_peak_memory() / 1024 ** 3
    mx.metal.reset_peak_memory()

    # The following is not necessary but it may help in memory constrained
@ -108,15 +111,15 @@ if __name__ == "__main__":
    # The following is not necessary but it may help in memory constrained
    # systems by reusing the memory kept by the flow transformer.
    del flux.flow
-    peak_mem_generation = mx.metal.get_peak_memory() / 1024**3
+    peak_mem_generation = mx.metal.get_peak_memory() / 1024 ** 3
    mx.metal.reset_peak_memory()

    # Decode them into images
    decoded = []
    for i in tqdm(range(0, args.n_images, args.decoding_batch_size)):
-        decoded.append(flux.decode(x_t[i : i + args.decoding_batch_size], latent_size))
+        decoded.append(flux.decode(x_t[i: i + args.decoding_batch_size], latent_size))
        mx.eval(decoded[-1])
-    peak_mem_decoding = mx.metal.get_peak_memory() / 1024**3
+    peak_mem_decoding = mx.metal.get_peak_memory() / 1024 ** 3
    peak_mem_overall = max(
        peak_mem_conditioning, peak_mem_generation, peak_mem_decoding
    )
@ -148,3 +151,7 @@ if __name__ == "__main__":
        print(f"Peak memory used for the generation: {peak_mem_generation:.3f}GB")
        print(f"Peak memory used for the decoding:   {peak_mem_decoding:.3f}GB")
        print(f"Peak memory used overall:            {peak_mem_overall:.3f}GB")
+
+
+if __name__ == "__main__":
+    main()