Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import torch
- from diffusers import ModularPipeline, ComponentsManager
- repo_id = "black-forest-labs/FLUX.1-dev"
- device = "cuda"
- components = ComponentsManager()
- components.enable_auto_cpu_offload(device=device)
- text_blocks = ModularPipeline.from_pretrained(repo_id, components_manager=components).blocks.sub_blocks.pop("text_encoder")
- text_encoder_node = text_blocks.init_pipeline(repo_id, components_manager=components)
- text_encoder_node.load_components(torch_dtype=torch.bfloat16)
- prompt = "a dog sitting by the see waiting for its companion to come"
- # We should provide text embedding related inputs (`max_sequence_length`, for example)
- # to the following step as it's utilized during the text embedding preparation
- # step and NOT in the denoising step.
- text_state = text_encoder_node(prompt=prompt, max_sequence_length=512)
- text_embeddings = text_state.get_by_kwargs("denoiser_input_fields")
- denoise_blocks = ModularPipeline.from_pretrained(repo_id).blocks.sub_blocks.pop("denoise")
- denoise_node = denoise_blocks.init_pipeline(repo_id, components_manager=components)
- denoise_node.load_components(torch_dtype=torch.bfloat16)
- print(f"{text_embeddings.keys()=}")
- denoise_state = denoise_node(
- **text_embeddings,
- guidance_scale=4.5,
- num_inference_steps=28,
- generator=torch.Generator(device=device).manual_seed(0),
- )
- Traceback (most recent call last):
- File "/fsx/sayak/diffusers/check_flux_sage.py", line 13, in <module>
- image = pipe(
- File "/fsx/sayak/miniconda3/envs/diffusers/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context
- return func(*args, **kwargs)
- File "/fsx/sayak/diffusers/src/diffusers/pipelines/flux/pipeline_flux.py", line 944, in __call__
- noise_pred = self.transformer(
- File "/fsx/sayak/miniconda3/envs/diffusers/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
- return self._call_impl(*args, **kwargs)
- File "/fsx/sayak/miniconda3/envs/diffusers/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
- return forward_call(*args, **kwargs)
- File "/fsx/sayak/diffusers/src/diffusers/models/transformers/transformer_flux.py", line 739, in forward
- encoder_hidden_states, hidden_states = block(
- File "/fsx/sayak/miniconda3/envs/diffusers/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1771, in _wrapped_call_impl
- return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
- File "/fsx/sayak/miniconda3/envs/diffusers/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 745, in compile_wrapper
- raise e.with_traceback(None) from e.__cause__ # User compiler error
- torch._dynamo.exc.Unsupported: torch.* op returned non-Tensor
- Explanation: torch.* ops that return a non-Tensor cannot be traced into the Dynamo FX graph output
- Developer debug context: example_value type: int; op: call_function; target: <function device_count at 0x7f249f6477f0>
- from user code:
- File "/fsx/sayak/diffusers/src/diffusers/models/transformers/transformer_flux.py", line 453, in forward
- attention_outputs = self.attn(
- File "/fsx/sayak/diffusers/src/diffusers/models/transformers/transformer_flux.py", line 352, in forward
- return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, image_rotary_emb, **kwargs)
- File "/fsx/sayak/diffusers/src/diffusers/models/transformers/transformer_flux.py", line 118, in __call__
- hidden_states = dispatch_attention_fn(
- File "/fsx/sayak/diffusers/src/diffusers/models/attention_dispatch.py", line 331, in dispatch_attention_fn
- return backend_fn(**kwargs)
- File "/fsx/sayak/diffusers/src/diffusers/models/attention_dispatch.py", line 1780, in _sage_attention_hub
- out = sage_attn_func_hub(
- File "/fsx/sayak/.cache/models--kernels-community--sage_attention/snapshots/c08f35540cbb059f0f245eb65e2aa072df3549ad/build/torch28-cxx11-cu129-x86_64-linux/sage_attention/core.py", line 120, in sageattn
- arch = get_cuda_arch_versions()[q.device.index]
- File "/fsx/sayak/.cache/models--kernels-community--sage_attention/snapshots/c08f35540cbb059f0f245eb65e2aa072df3549ad/build/torch28-cxx11-cu129-x86_64-linux/sage_attention/core.py", line 50, in get_cuda_arch_versions
- for i in range(torch.cuda.device_count()):
- Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"
Advertisement
Add Comment
Please, Sign In to add comment