diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 46e241d817b5..a95949f33a39 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -114,6 +114,8 @@ title: Guiders - local: modular_diffusers/custom_blocks title: Building Custom Blocks + - local: modular_diffusers/mellon + title: Mellon Guide title: Modular Diffusers - isExpanded: false sections: diff --git a/docs/source/en/modular_diffusers/mellon.md b/docs/source/en/modular_diffusers/mellon.md new file mode 100644 index 000000000000..c684f86df94e --- /dev/null +++ b/docs/source/en/modular_diffusers/mellon.md @@ -0,0 +1,233 @@ + + + +## Using Custom Blocks with Mellon + +[Mellon](https://github.com/cubiq/Mellon) is a visual workflow interface (similar to ComfyUI) that integrates with Modular Diffusers. This guide shows how to add Mellon support to your custom blocks so they can be used in the Mellon UI. + +## Overview + +To use a custom block in Mellon, you need a `mellon_pipeline_config.json` file that defines how your block's parameters map to Mellon UI components. Here's how to create one: + +1. **Add a "Mellon type" to your block's parameters** - Each `InputParam`/`OutputParam` needs a type that tells Mellon what UI component to render (e.g., `"textbox"`, `"dropdown"`, `"image"`). You can specify types via metadata in your block definitions, or pass them when generating the config. +2. **Generate `mellon_pipeline_config.json`** - Use our utility to generate a default template and push it to your Hub repository +3. **(Optional) Manually adjust the template** - Fine-tune the generated config for your specific needs + +## Step 1: Specify Mellon Types for Parameters + +Mellon types determine how each parameter renders in the UI. If you don't specify a type for a parameter, it will default to `"custom"`, which renders as a simple connection dot. You can always adjust this later in the generated config. + +### Supported Mellon Types + +| Type | Input/Output | Description | +|------|--------------|-------------| +| `image` | Both | Image (PIL Image) | +| `video` | Both | Video | +| `text` | Both | Text display | +| `textbox` | Input | Text input | +| `dropdown` | Input | Dropdown selection menu | +| `slider` | Input | Slider for numeric values | +| `number` | Input | Numeric input | +| `checkbox` | Input | Boolean toggle | + +### Method 1: Using `metadata` in Block Definitions + +If you're defining a custom block from scratch, you can add `metadata={"mellon": ""}` directly to your `InputParam` and `OutputParam` definitions: +```python +class GeminiPromptExpander(ModularPipelineBlocks): + + @property + def inputs(self) -> List[InputParam]: + return [ + InputParam( + "prompt", + type_hint=str, + required=True, + description="Prompt to use", + metadata={"mellon": "textbox"}, # Text input + ) + ] + + @property + def intermediate_outputs(self) -> List[OutputParam]: + return [ + OutputParam( + "prompt", + type_hint=str, + description="Expanded prompt by the LLM", + metadata={"mellon": "text"}, # Text output + ), + OutputParam( + "old_prompt", + type_hint=str, + description="Old prompt provided by the user", + # No metadata - we don't want to render this in UI + ) + ] +``` + +### Method 2: Using `input_types` and `output_types` When Generating Config + +If you're working with an existing pipeline or prefer to keep your block definitions clean, you can specify types when generating the config using the `input_types/output_types` argument: +```python +from diffusers.modular_pipelines.mellon_node_utils import MellonPipelineConfig + +mellon_config = MellonPipelineConfig.from_custom_block( + blocks, + input_types={"prompt": "textbox"}, + output_types={"prompt": "text"} +) +``` + +> [!NOTE] +> If you specify both `metadata` and `input_types`/`output_types`, the arguments take precedence, allowing you to override metadata when needed. + +## Step 2: Generate and Push the Mellon Config + +After adding metadata to your block, generate the default Mellon configuration template and push it to the Hub: + +```python +from diffusers import ModularPipelineBlocks +from diffusers.modular_pipelines.mellon_node_utils import MellonPipelineConfig + +# load your custom blocks from your local dir +blocks = ModularPipelineBlocks.from_pretrained("/path/local/folder", trust_remote_code=True) + +# Generate the default config template +mellon_config = MellonPipelineConfig.from_custom_block(blocks) +# push the default template to `repo_id`, you will need to pass the same local folder path so that it will save the config locally first +mellon_config.save( + local_dir="/path/local/folder", + repo_id= repo_id, + push_to_hub=True +) +``` + +This creates a `mellon_pipeline_config.json` file in your repository. + +## Step 3: Review and Adjust the Config (Optional) + +The generated template is a starting point - you may want to adjust it for your needs. Let's walk through the generated config for the Gemini Prompt Expander: + +```json +{ + "label": "Gemini Prompt Expander", + "default_repo": "", + "default_dtype": "", + "node_params": { + "custom": { + "params": { + "prompt": { + "label": "Prompt", + "type": "string", + "display": "textarea", + "default": "" + }, + "out_prompt": { + "label": "Prompt", + "type": "string", + "display": "output" + }, + "old_prompt": { + "label": "Old Prompt", + "type": "custom", + "display": "output" + }, + "doc": { + "label": "Doc", + "type": "string", + "display": "output" + } + }, + "input_names": ["prompt"], + "model_input_names": [], + "output_names": ["out_prompt", "old_prompt", "doc"], + "block_name": "custom", + "node_type": "custom" + } + } +} +``` + +### Understanding the Structure + +The `params` dict defines how each UI element renders. The `input_names`, `model_input_names`, and `output_names` lists map these UI elements to the underlying [`ModularPipelineBlocks`]'s I/O interface: + +| Mellon Config | ModularPipelineBlocks | +|---------------|----------------------| +| `input_names` | `inputs` property | +| `model_input_names` | `expected_components` property | +| `output_names` | `intermediate_outputs` property | + +In this example: `prompt` is the only input, there are no model components, and outputs include `out_prompt`, `old_prompt`, and `doc`. + +Now let's look at the `params` dict: + +**`prompt`** is an input parameter. It has `display: "textarea"` which renders as a text input box, `label: "Prompt"` shown in the UI, and `default: ""` so it starts empty. The `type: "string"` field is important in Mellon because it determines which nodes can connect together - only matching types can be linked with "noodles". + +**`out_prompt`** is the expanded prompt output. The `out_` prefix was automatically added because the input and output share the same name (`prompt`), avoiding naming conflicts in the config. It has `display: "output"` which renders as an output socket. + +**`old_prompt`** has `type: "custom"` because we didn't specify metadata. This renders as a simple dot in the UI. Since we don't actually want to expose this in the UI, we can remove it. + +**`doc`** is the documentation output, automatically added to all custom blocks. + +### Making Adjustments + +For the Gemini Prompt Expander, we don't need `old_prompt` in the UI. Remove it from both `params` and `output_names`: + +```json +{ + "label": "Gemini Prompt Expander", + "default_repo": "", + "default_dtype": "", + "node_params": { + "custom": { + "params": { + "prompt": { + "label": "Prompt", + "type": "string", + "display": "textarea", + "default": "" + }, + "out_prompt": { + "label": "Prompt", + "type": "string", + "display": "output" + }, + "doc": { + "label": "Doc", + "type": "string", + "display": "output" + } + }, + "input_names": ["prompt"], + "model_input_names": [], + "output_names": ["out_prompt", "doc"], + "block_name": "custom", + "node_type": "custom" + } + } +} +``` + +See the final config at [YiYiXu/gemini-prompt-expander](https://huggingface.co/YiYiXu/gemini-prompt-expander). + +## Use in Mellon + +1. Start Mellon (see [Mellon installation guide](https://github.com/cubiq/Mellon)) + +2. In Mellon: + - Drag a **Dynamic Block Node** from the ModularDiffusers section + - Enter your `repo_id` (e.g., `YiYiXu/gemini-prompt-expander`) + - Click **Load Custom Block** + - The node will transform to show your block's inputs and outputs \ No newline at end of file diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py index e16abb382313..acc50ccce82c 100644 --- a/src/diffusers/modular_pipelines/components_manager.py +++ b/src/diffusers/modular_pipelines/components_manager.py @@ -324,6 +324,7 @@ class ComponentsManager: "has_hook", "execution_device", "ip_adapter", + "quantization", ] def __init__(self): @@ -356,7 +357,9 @@ def _lookup_ids( ids_by_name.add(component_id) else: ids_by_name = set(components.keys()) - if collection: + if collection and collection not in self.collections: + return set() + elif collection and collection in self.collections: ids_by_collection = set() for component_id, component in components.items(): if component_id in self.collections[collection]: @@ -760,7 +763,6 @@ def disable_auto_cpu_offload(self): self.model_hooks = None self._auto_offload_enabled = False - # YiYi TODO: (1) add quantization info def get_model_info( self, component_id: str, @@ -836,6 +838,14 @@ def get_model_info( if scales: info["ip_adapter"] = summarize_dict_by_value_and_parts(scales) + # Check for quantization + hf_quantizer = getattr(component, "hf_quantizer", None) + if hf_quantizer is not None: + quant_config = hf_quantizer.quantization_config + info["quantization"] = quant_config.to_dict() + else: + info["quantization"] = None + # If fields specified, filter info if fields is not None: return {k: v for k, v in info.items() if k in fields} @@ -966,12 +976,14 @@ def format_device(component, info): output += "\nAdditional Component Info:\n" + "=" * 50 + "\n" for name in self.components: info = self.get_model_info(name) - if info is not None and (info.get("adapters") is not None or info.get("ip_adapter")): + if info is not None and (info.get("adapters") is not None or info.get("ip_adapter") or info.get("quantization")): output += f"\n{name}:\n" if info.get("adapters") is not None: output += f" Adapters: {info['adapters']}\n" if info.get("ip_adapter"): output += " IP-Adapter: Enabled\n" + if info.get("quantization"): + output += f" Quantization: {info['quantization']}\n" return output diff --git a/src/diffusers/modular_pipelines/mellon_node_utils.py b/src/diffusers/modular_pipelines/mellon_node_utils.py index f848afe9a3ae..0a4c30cc5c88 100644 --- a/src/diffusers/modular_pipelines/mellon_node_utils.py +++ b/src/diffusers/modular_pipelines/mellon_node_utils.py @@ -1,3 +1,4 @@ +import copy import json import logging import os @@ -6,7 +7,7 @@ from dataclasses import asdict, dataclass from typing import Any, Dict, List, Optional, Union -from huggingface_hub import create_repo, hf_hub_download, upload_folder +from huggingface_hub import create_repo, hf_hub_download, upload_file from huggingface_hub.utils import ( EntryNotFoundError, HfHubHTTPError, @@ -15,25 +16,256 @@ ) from ..utils import HUGGINGFACE_CO_RESOLVE_ENDPOINT +from .modular_pipeline_utils import InputParam, OutputParam logger = logging.getLogger(__name__) +def _name_to_label(name: str) -> str: + """Convert snake_case name to Title Case label.""" + return name.replace("_", " ").title() + + +# Template definitions for standard diffuser pipeline parameters +MELLON_PARAM_TEMPLATES = { + # Image I/O + "image": {"label": "Image", "type": "image", "display": "input", "required_block_params": ["image"]}, + "images": {"label": "Images", "type": "image", "display": "output", "required_block_params": ["images"]}, + "control_image": { + "label": "Control Image", + "type": "image", + "display": "input", + "required_block_params": ["control_image"], + }, + # Latents + "latents": {"label": "Latents", "type": "latents", "display": "input", "required_block_params": ["latents"]}, + "image_latents": { + "label": "Image Latents", + "type": "latents", + "display": "input", + "required_block_params": ["image_latents"], + }, + "first_frame_latents": { + "label": "First Frame Latents", + "type": "latents", + "display": "input", + "required_block_params": ["first_frame_latents"], + }, + "latents_preview": {"label": "Latents Preview", "type": "latent", "display": "output"}, + # Image Latents with Strength + "image_latents_with_strength": { + "name": "image_latents", # name is not same as template key + "label": "Image Latents", + "type": "latents", + "display": "input", + "onChange": {"false": ["height", "width"], "true": ["strength"]}, + "required_block_params": ["image_latents", "strength"], + }, + # Embeddings + "embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "output"}, + "image_embeds": { + "label": "Image Embeddings", + "type": "image_embeds", + "display": "output", + "required_block_params": ["image_embeds"], + }, + # Text inputs + "prompt": { + "label": "Prompt", + "type": "string", + "display": "textarea", + "default": "", + "required_block_params": ["prompt"], + }, + "negative_prompt": { + "label": "Negative Prompt", + "type": "string", + "display": "textarea", + "default": "", + "required_block_params": ["negative_prompt"], + }, + # Numeric params + "guidance_scale": { + "label": "Guidance Scale", + "type": "float", + "display": "slider", + "default": 5.0, + "min": 1.0, + "max": 30.0, + "step": 0.1, + }, + "strength": { + "label": "Strength", + "type": "float", + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["strength"], + }, + "height": { + "label": "Height", + "type": "int", + "default": 1024, + "min": 64, + "step": 8, + "required_block_params": ["height"], + }, + "width": { + "label": "Width", + "type": "int", + "default": 1024, + "min": 64, + "step": 8, + "required_block_params": ["width"], + }, + "seed": { + "label": "Seed", + "type": "int", + "default": 0, + "min": 0, + "max": 4294967295, + "display": "random", + "required_block_params": ["generator"], + }, + "num_inference_steps": { + "label": "Steps", + "type": "int", + "default": 25, + "min": 1, + "max": 100, + "display": "slider", + "required_block_params": ["num_inference_steps"], + }, + "num_frames": { + "label": "Frames", + "type": "int", + "default": 81, + "min": 1, + "max": 480, + "display": "slider", + "required_block_params": ["num_frames"], + }, + "layers": { + "label": "Layers", + "type": "int", + "default": 4, + "min": 1, + "max": 10, + "display": "slider", + "required_block_params": ["layers"], + }, + # ControlNet + "controlnet_conditioning_scale": { + "label": "Controlnet Conditioning Scale", + "type": "float", + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["controlnet_conditioning_scale"], + }, + "control_guidance_start": { + "label": "Control Guidance Start", + "type": "float", + "default": 0.0, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["control_guidance_start"], + }, + "control_guidance_end": { + "label": "Control Guidance End", + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["control_guidance_end"], + }, + # Video + "videos": {"label": "Videos", "type": "video", "display": "output", "required_block_params": ["videos"]}, + # Models + "vae": {"label": "VAE", "type": "diffusers_auto_model", "display": "input", "required_block_params": ["vae"]}, + "image_encoder": { + "label": "Image Encoder", + "type": "diffusers_auto_model", + "display": "input", + "required_block_params": ["image_encoder"], + }, + "unet": {"label": "Denoise Model", "type": "diffusers_auto_model", "display": "input"}, + "scheduler": {"label": "Scheduler", "type": "diffusers_auto_model", "display": "input"}, + "controlnet": { + "label": "ControlNet Model", + "type": "diffusers_auto_model", + "display": "input", + "required_block_params": ["controlnet"], + }, + "text_encoders": { + "label": "Text Encoders", + "type": "diffusers_auto_models", + "display": "input", + "required_block_params": ["text_encoder"], + }, + # Bundles/Custom + "controlnet_bundle": { + "label": "ControlNet", + "type": "custom_controlnet", + "display": "input", + "required_block_params": "controlnet_image", + }, + "ip_adapter": {"label": "IP Adapter", "type": "custom_ip_adapter", "display": "input"}, + "guider": { + "label": "Guider", + "type": "custom_guider", + "display": "input", + "onChange": {False: ["guidance_scale"], True: []}, + }, + "doc": {"label": "Doc", "type": "string", "display": "output"}, +} + + +class MellonParamMeta(type): + """Metaclass that enables MellonParam.template_name(**overrides) syntax.""" + + def __getattr__(cls, name: str): + if name in MELLON_PARAM_TEMPLATES: + + def factory(default=None, **overrides): + template = MELLON_PARAM_TEMPLATES[name] + # Use template's name if specified, otherwise use the key + params = {"name": template.get("name", name), **template, **overrides} + if default is not None: + params["default"] = default + return cls(**params) + + return factory + + raise AttributeError(f"type object 'MellonParam' has no attribute '{name}'") + + @dataclass(frozen=True) -class MellonParam: +class MellonParam(metaclass=MellonParamMeta): """ Parameter definition for Mellon nodes. - Use factory methods for common params (e.g., MellonParam.seed()) or create custom ones with - MellonParam(name="...", label="...", type="..."). - - Example: + Usage: ```python - # Custom param - MellonParam(name="my_param", label="My Param", type="float", default=0.5) - # Output in Mellon node definition: - # "my_param": {"label": "My Param", "type": "float", "default": 0.5} + # From template (standard diffuser params) + MellonParam.seed() + MellonParam.prompt(default="a cat") + MellonParam.latents(display="output") + + # Generic inputs (for custom blocks) + MellonParam.Input.slider("my_scale", default=1.0, min=0.0, max=2.0) + MellonParam.Input.dropdown("mode", options=["fast", "slow"]) + + # Generic outputs + MellonParam.Output.image("result_images") + + # Fully custom + MellonParam(name="custom", label="Custom", type="float", default=0.5) ``` """ @@ -53,577 +285,198 @@ class MellonParam: required_block_params: Optional[Union[str, List[str]]] = None def to_dict(self) -> Dict[str, Any]: - """Convert to dict for Mellon schema, excluding None values and name.""" + """Convert to dict for Mellon schema, excluding None values and internal fields.""" data = asdict(self) return {k: v for k, v in data.items() if v is not None and k not in ("name", "required_block_params")} - @classmethod - def image(cls) -> "MellonParam": - """ - Image input parameter. - - Mellon node definition: - "image": {"label": "Image", "type": "image", "display": "input"} - """ - return cls(name="image", label="Image", type="image", display="input", required_block_params=["image"]) - - @classmethod - def images(cls) -> "MellonParam": - """ - Images output parameter. - - Mellon node definition: - "images": {"label": "Images", "type": "image", "display": "output"} - """ - return cls(name="images", label="Images", type="image", display="output", required_block_params=["images"]) - - @classmethod - def control_image(cls, display: str = "input") -> "MellonParam": - """ - Control image parameter for ControlNet. - - Mellon node definition (display="input"): - "control_image": {"label": "Control Image", "type": "image", "display": "input"} - """ - return cls( - name="control_image", - label="Control Image", - type="image", - display=display, - required_block_params=["control_image"], - ) - - @classmethod - def latents(cls, display: str = "input") -> "MellonParam": - """ - Latents parameter. - - Mellon node definition (display="input"): - "latents": {"label": "Latents", "type": "latents", "display": "input"} - - Mellon node definition (display="output"): - "latents": {"label": "Latents", "type": "latents", "display": "output"} - """ - return cls(name="latents", label="Latents", type="latents", display=display, required_block_params=["latents"]) - - @classmethod - def image_latents(cls, display: str = "input") -> "MellonParam": - """ - Image latents parameter for img2img workflows. - - Mellon node definition (display="input"): - "image_latents": {"label": "Image Latents", "type": "latents", "display": "input"} - """ - return cls( - name="image_latents", - label="Image Latents", - type="latents", - display=display, - required_block_params=["image_latents"], - ) - - @classmethod - def first_frame_latents(cls, display: str = "input") -> "MellonParam": - """ - First frame latents for video generation. - - Mellon node definition (display="input"): - "first_frame_latents": {"label": "First Frame Latents", "type": "latents", "display": "input"} - """ - return cls( - name="first_frame_latents", - label="First Frame Latents", - type="latents", - display=display, - required_block_params=["first_frame_latents"], - ) - - @classmethod - def image_latents_with_strength(cls) -> "MellonParam": - """ - Image latents with strength-based onChange behavior. When connected, shows strength slider; when disconnected, - shows height/width. - - Mellon node definition: - "image_latents": { - "label": "Image Latents", "type": "latents", "display": "input", "onChange": {"false": ["height", - "width"], "true": ["strength"]} - } - """ - return cls( - name="image_latents", - label="Image Latents", - type="latents", - display="input", - onChange={"false": ["height", "width"], "true": ["strength"]}, - required_block_params=["image_latents", "strength"], - ) - - @classmethod - def latents_preview(cls) -> "MellonParam": - """ - Latents preview output for visualizing latents in the UI. - - Mellon node definition: - "latents_preview": {"label": "Latents Preview", "type": "latent", "display": "output"} - """ - return cls(name="latents_preview", label="Latents Preview", type="latent", display="output") - - @classmethod - def embeddings(cls, display: str = "output") -> "MellonParam": - """ - Text embeddings parameter. - - Mellon node definition (display="output"): - "embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "output"} - - Mellon node definition (display="input"): - "embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "input"} - """ - return cls(name="embeddings", label="Text Embeddings", type="embeddings", display=display) - - @classmethod - def image_embeds(cls, display: str = "output") -> "MellonParam": - """ - Image embeddings parameter for IP-Adapter workflows. - - Mellon node definition (display="output"): - "image_embeds": {"label": "Image Embeddings", "type": "image_embeds", "display": "output"} - """ - return cls( - name="image_embeds", - label="Image Embeddings", - type="image_embeds", - display=display, - required_block_params=["image_embeds"], - ) - - @classmethod - def controlnet_conditioning_scale(cls, default: float = 0.5) -> "MellonParam": - """ - ControlNet conditioning scale slider. - - Mellon node definition (default=0.5): - "controlnet_conditioning_scale": { - "label": "Controlnet Conditioning Scale", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0, - "step": 0.01 - } - """ - return cls( - name="controlnet_conditioning_scale", - label="Controlnet Conditioning Scale", - type="float", - default=default, - min=0.0, - max=1.0, - step=0.01, - required_block_params=["controlnet_conditioning_scale"], - ) - - @classmethod - def control_guidance_start(cls, default: float = 0.0) -> "MellonParam": - """ - Control guidance start timestep. - - Mellon node definition (default=0.0): - "control_guidance_start": { - "label": "Control Guidance Start", "type": "float", "default": 0.0, "min": 0.0, "max": 1.0, "step": - 0.01 - } - """ - return cls( - name="control_guidance_start", - label="Control Guidance Start", - type="float", - default=default, - min=0.0, - max=1.0, - step=0.01, - required_block_params=["control_guidance_start"], - ) - - @classmethod - def control_guidance_end(cls, default: float = 1.0) -> "MellonParam": - """ - Control guidance end timestep. - - Mellon node definition (default=1.0): - "control_guidance_end": { - "label": "Control Guidance End", "type": "float", "default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01 - } - """ - return cls( - name="control_guidance_end", - label="Control Guidance End", - type="float", - default=default, - min=0.0, - max=1.0, - step=0.01, - required_block_params=["control_guidance_end"], - ) - - @classmethod - def prompt(cls, default: str = "") -> "MellonParam": - """ - Text prompt input as textarea. - - Mellon node definition (default=""): - "prompt": {"label": "Prompt", "type": "string", "default": "", "display": "textarea"} - """ - return cls( - name="prompt", - label="Prompt", - type="string", - default=default, - display="textarea", - required_block_params=["prompt"], - ) - - @classmethod - def negative_prompt(cls, default: str = "") -> "MellonParam": - """ - Negative prompt input as textarea. - - Mellon node definition (default=""): - "negative_prompt": {"label": "Negative Prompt", "type": "string", "default": "", "display": "textarea"} - """ - return cls( - name="negative_prompt", - label="Negative Prompt", - type="string", - default=default, - display="textarea", - required_block_params=["negative_prompt"], - ) - - @classmethod - def strength(cls, default: float = 0.5) -> "MellonParam": - """ - Denoising strength for img2img. - - Mellon node definition (default=0.5): - "strength": {"label": "Strength", "type": "float", "default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01} - """ - return cls( - name="strength", - label="Strength", - type="float", - default=default, - min=0.0, - max=1.0, - step=0.01, - required_block_params=["strength"], - ) - - @classmethod - def guidance_scale(cls, default: float = 5.0) -> "MellonParam": - """ - CFG guidance scale slider. - - Mellon node definition (default=5.0): - "guidance_scale": { - "label": "Guidance Scale", "type": "float", "display": "slider", "default": 5.0, "min": 1.0, "max": - 30.0, "step": 0.1 - } - """ - return cls( - name="guidance_scale", - label="Guidance Scale", - type="float", - display="slider", - default=default, - min=1.0, - max=30.0, - step=0.1, - ) - - @classmethod - def height(cls, default: int = 1024) -> "MellonParam": - """ - Image height in pixels. - - Mellon node definition (default=1024): - "height": {"label": "Height", "type": "int", "default": 1024, "min": 64, "step": 8} - """ - return cls( - name="height", - label="Height", - type="int", - default=default, - min=64, - step=8, - required_block_params=["height"], - ) - - @classmethod - def width(cls, default: int = 1024) -> "MellonParam": - """ - Image width in pixels. - - Mellon node definition (default=1024): - "width": {"label": "Width", "type": "int", "default": 1024, "min": 64, "step": 8} - """ - return cls( - name="width", label="Width", type="int", default=default, min=64, step=8, required_block_params=["width"] - ) - - @classmethod - def seed(cls, default: int = 0) -> "MellonParam": - """ - Random seed with randomize button. - - Mellon node definition (default=0): - "seed": { - "label": "Seed", "type": "int", "default": 0, "min": 0, "max": 4294967295, "display": "random" - } - """ - return cls( - name="seed", - label="Seed", - type="int", - default=default, - min=0, - max=4294967295, - display="random", - required_block_params=["generator"], - ) - - @classmethod - def num_inference_steps(cls, default: int = 25) -> "MellonParam": - """ - Number of denoising steps slider. - - Mellon node definition (default=25): - "num_inference_steps": { - "label": "Steps", "type": "int", "default": 25, "min": 1, "max": 100, "display": "slider" - } - """ - return cls( - name="num_inference_steps", - label="Steps", - type="int", - default=default, - min=1, - max=100, - display="slider", - required_block_params=["num_inference_steps"], - ) - - @classmethod - def num_frames(cls, default: int = 81) -> "MellonParam": - """ - Number of video frames slider. - - Mellon node definition (default=81): - "num_frames": {"label": "Frames", "type": "int", "default": 81, "min": 1, "max": 480, "display": "slider"} - """ - return cls( - name="num_frames", - label="Frames", - type="int", - default=default, - min=1, - max=480, - display="slider", - required_block_params=["num_frames"], - ) - - @classmethod - def layers(cls, default: int = 4) -> "MellonParam": - """ - Number of layers slider (for layered diffusion). - - Mellon node definition (default=4): - "layers": {"label": "Layers", "type": "int", "default": 4, "min": 1, "max": 10, "display": "slider"} - """ - return cls( - name="layers", - label="Layers", - type="int", - default=default, - min=1, - max=10, - display="slider", - required_block_params=["layers"], - ) - - @classmethod - def videos(cls) -> "MellonParam": - """ - Video output parameter. - - Mellon node definition: - "videos": {"label": "Videos", "type": "video", "display": "output"} - """ - return cls(name="videos", label="Videos", type="video", display="output", required_block_params=["videos"]) - - @classmethod - def vae(cls) -> "MellonParam": - """ - VAE model input. - - Mellon node definition: - "vae": {"label": "VAE", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use - components.get_one(model_id) to retrieve the actual model. - """ - return cls( - name="vae", label="VAE", type="diffusers_auto_model", display="input", required_block_params=["vae"] - ) - - @classmethod - def image_encoder(cls) -> "MellonParam": - """ - Image encoder model input. - - Mellon node definition: - "image_encoder": {"label": "Image Encoder", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use - components.get_one(model_id) to retrieve the actual model. - """ - return cls( - name="image_encoder", - label="Image Encoder", - type="diffusers_auto_model", - display="input", - required_block_params=["image_encoder"], - ) - - @classmethod - def unet(cls) -> "MellonParam": - """ - Denoising model (UNet/Transformer) input. - - Mellon node definition: - "unet": {"label": "Denoise Model", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use - components.get_one(model_id) to retrieve the actual model. - """ - return cls(name="unet", label="Denoise Model", type="diffusers_auto_model", display="input") - - @classmethod - def scheduler(cls) -> "MellonParam": - """ - Scheduler model input. - - Mellon node definition: - "scheduler": {"label": "Scheduler", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id'. Use - components.get_one(model_id) to retrieve the actual scheduler. - """ - return cls(name="scheduler", label="Scheduler", type="diffusers_auto_model", display="input") - - @classmethod - def controlnet(cls) -> "MellonParam": - """ - ControlNet model input. - - Mellon node definition: - "controlnet": {"label": "ControlNet Model", "type": "diffusers_auto_model", "display": "input"} - - Note: The value received is a model info dict with keys like 'model_id', 'repo_id', 'execution_device'. Use - components.get_one(model_id) to retrieve the actual model. - """ - return cls( - name="controlnet", - label="ControlNet Model", - type="diffusers_auto_model", - display="input", - required_block_params=["controlnet"], - ) - - @classmethod - def text_encoders(cls) -> "MellonParam": - """ - Text encoders dict input (multiple encoders). - - Mellon node definition: - "text_encoders": {"label": "Text Encoders", "type": "diffusers_auto_models", "display": "input"} - - Note: The value received is a dict of model info dicts: - { - 'text_encoder': {'model_id': ..., 'execution_device': ..., ...}, 'tokenizer': {'model_id': ..., ...}, - 'repo_id': '...' - } - Use components.get_one(model_id) to retrieve each model. - """ - return cls( - name="text_encoders", - label="Text Encoders", - type="diffusers_auto_models", - display="input", - required_block_params=["text_encoder"], - ) - - @classmethod - def controlnet_bundle(cls, display: str = "input") -> "MellonParam": - """ - ControlNet bundle containing model and processed control inputs. Output from ControlNet node, input to Denoise - node. - - Mellon node definition (display="input"): - "controlnet_bundle": {"label": "ControlNet", "type": "custom_controlnet", "display": "input"} + # ========================================================================= + # Input: Generic input parameter factories (for custom blocks) + # ========================================================================= + class Input: + """input UI elements for custom blocks.""" + + @classmethod + def image(cls, name: str) -> "MellonParam": + """image input.""" + return MellonParam(name=name, label=_name_to_label(name), type="image", display="input") + + @classmethod + def textbox(cls, name: str, default: str = "") -> "MellonParam": + """text input as textarea.""" + return MellonParam( + name=name, label=_name_to_label(name), type="string", display="textarea", default=default + ) - Mellon node definition (display="output"): - "controlnet_bundle": {"label": "ControlNet", "type": "custom_controlnet", "display": "output"} + @classmethod + def dropdown(cls, name: str, options: List[str] = None, default: str = None) -> "MellonParam": + """dropdown selection.""" + if options and not default: + default = options[0] + if not default: + default = "" + if not options: + options = [default] + return MellonParam(name=name, label=_name_to_label(name), type="string", options=options, value=default) + + @classmethod + def slider( + cls, name: str, default: float = 0, min: float = None, max: float = None, step: float = None + ) -> "MellonParam": + """slider input.""" + is_float = isinstance(default, float) or (step is not None and isinstance(step, float)) + param_type = "float" if is_float else "int" + if min is None: + min = default + if max is None: + max = default + if step is None: + step = 0.01 if is_float else 1 + return MellonParam( + name=name, + label=_name_to_label(name), + type=param_type, + display="slider", + default=default, + min=min, + max=max, + step=step, + ) - Note: The value is a dict containing: - { - 'controlnet': {'model_id': ..., ...}, # controlnet model info 'control_image': ..., # processed control - image/embeddings 'controlnet_conditioning_scale': ..., # and other denoise block inputs - } - """ - return cls( - name="controlnet_bundle", - label="ControlNet", - type="custom_controlnet", - display=display, - required_block_params="controlnet_image", - ) + @classmethod + def number( + cls, name: str, default: float = 0, min: float = None, max: float = None, step: float = None + ) -> "MellonParam": + """number input (no slider).""" + is_float = isinstance(default, float) or (step is not None and isinstance(step, float)) + param_type = "float" if is_float else "int" + return MellonParam( + name=name, label=_name_to_label(name), type=param_type, default=default, min=min, max=max, step=step + ) - @classmethod - def ip_adapter(cls) -> "MellonParam": - """ - IP-Adapter input. + @classmethod + def seed(cls, name: str = "seed", default: int = 0) -> "MellonParam": + """seed input with randomize button.""" + return MellonParam( + name=name, + label=_name_to_label(name), + type="int", + display="random", + default=default, + min=0, + max=4294967295, + ) - Mellon node definition: - "ip_adapter": {"label": "IP Adapter", "type": "custom_ip_adapter", "display": "input"} - """ - return cls(name="ip_adapter", label="IP Adapter", type="custom_ip_adapter", display="input") + @classmethod + def checkbox(cls, name: str, default: bool = False) -> "MellonParam": + """boolean checkbox.""" + return MellonParam(name=name, label=_name_to_label(name), type="boolean", default=default) + + @classmethod + def custom_type(cls, name: str, type: str) -> "MellonParam": + """custom type input for node connections.""" + return MellonParam(name=name, label=_name_to_label(name), type=type, display="input") + + @classmethod + def model(cls, name: str) -> "MellonParam": + """model input for diffusers components.""" + return MellonParam(name=name, label=_name_to_label(name), type="diffusers_auto_model", display="input") + + # ========================================================================= + # Output: Generic output parameter factories (for custom blocks) + # ========================================================================= + class Output: + """output UI elements for custom blocks.""" + + @classmethod + def image(cls, name: str) -> "MellonParam": + """image output.""" + return MellonParam(name=name, label=_name_to_label(name), type="image", display="output") + + @classmethod + def video(cls, name: str) -> "MellonParam": + """video output.""" + return MellonParam(name=name, label=_name_to_label(name), type="video", display="output") + + @classmethod + def text(cls, name: str) -> "MellonParam": + """text output.""" + return MellonParam(name=name, label=_name_to_label(name), type="string", display="output") + + @classmethod + def custom_type(cls, name: str, type: str) -> "MellonParam": + """custom type output for node connections.""" + return MellonParam(name=name, label=_name_to_label(name), type=type, display="output") + + @classmethod + def model(cls, name: str) -> "MellonParam": + """model output for diffusers components.""" + return MellonParam(name=name, label=_name_to_label(name), type="diffusers_auto_model", display="output") + + +def input_param_to_mellon_param(input_param: "InputParam") -> MellonParam: + """ + Convert an InputParam to a MellonParam using metadata. - @classmethod - def guider(cls) -> "MellonParam": - """ - Custom guider input. When connected, hides the guidance_scale slider. + Args: + input_param: An InputParam with optional metadata={"mellon": ""} where type is one of: + image, video, text, textbox, checkbox, number, slider, dropdown, seed, model. If metadata is None or + unknown, maps to "custom". - Mellon node definition: - "guider": { - "label": "Guider", "type": "custom_guider", "display": "input", "onChange": {false: ["guidance_scale"], - true: []} - } - """ - return cls( - name="guider", - label="Guider", - type="custom_guider", - display="input", - onChange={False: ["guidance_scale"], True: []}, - ) + Returns: + MellonParam instance + """ + name = input_param.name + metadata = input_param.metadata + mellon_type = metadata.get("mellon") if metadata else None + default = input_param.default + + if mellon_type == "image": + return MellonParam.Input.image(name) + elif mellon_type == "textbox": + return MellonParam.Input.textbox(name, default=default or "") + elif mellon_type == "dropdown": + return MellonParam.Input.dropdown(name, default=default or "") + elif mellon_type == "slider": + return MellonParam.Input.slider(name, default=default or 0) + elif mellon_type == "number": + return MellonParam.Input.number(name, default=default or 0) + elif mellon_type == "seed": + return MellonParam.Input.seed(name, default=default or 0) + elif mellon_type == "checkbox": + return MellonParam.Input.checkbox(name, default=default or False) + elif mellon_type == "model": + return MellonParam.Input.model(name) + else: + # None or unknown -> custom + return MellonParam.Input.custom_type(name, type="custom") + + +def output_param_to_mellon_param(output_param: "OutputParam") -> MellonParam: + """ + Convert an OutputParam to a MellonParam using metadata. - @classmethod - def doc(cls) -> "MellonParam": - """ - Documentation output for inspecting the underlying modular pipeline. + Args: + output_param: An OutputParam with optional metadata={"mellon": ""} where type is one of: + image, video, text, model. If metadata is None or unknown, maps to "custom". - Mellon node definition: - "doc": {"label": "Doc", "type": "string", "display": "output"} - """ - return cls(name="doc", label="Doc", type="string", display="output") + Returns: + MellonParam instance + """ + name = output_param.name + metadata = output_param.metadata + mellon_type = metadata.get("mellon") if metadata else None + + if mellon_type == "image": + return MellonParam.Output.image(name) + elif mellon_type == "video": + return MellonParam.Output.video(name) + elif mellon_type == "text": + return MellonParam.Output.text(name) + elif mellon_type == "model": + return MellonParam.Output.model(name) + else: + # None or unknown -> custom + return MellonParam.Output.custom_type(name, type="custom") DEFAULT_NODE_SPECS = { @@ -804,10 +657,15 @@ def node_spec_to_mellon_dict(node_spec: Dict[str, Any], node_type: str) -> Dict[ params[p.name] = param_dict model_input_names.append(p.name) - # Process outputs + # Process outputs: add a prefix to the output name if it already exists as an input for p in node_spec.get("outputs", []): - params[p.name] = p.to_dict() - output_names.append(p.name) + if p.name in input_names: + # rename to out_ + output_name = f"out_{p.name}" + else: + output_name = p.name + params[output_name] = p.to_dict() + output_names.append(output_name) return { "params": params, @@ -959,7 +817,7 @@ def from_json_file(cls, json_file_path: Union[str, os.PathLike]) -> "MellonPipel return cls.from_dict(data) def save(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs): - """Save the pipeline config to a directory.""" + """Save the mellon pipeline config to a directory.""" if os.path.isfile(save_directory): raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") @@ -975,15 +833,14 @@ def save(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = Fals token = kwargs.pop("token", None) repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id - subfolder = kwargs.pop("subfolder", None) - upload_folder( + upload_file( + path_or_fileobj=output_path, + path_in_repo=self.config_name, repo_id=repo_id, - folder_path=save_directory, token=token, commit_message=commit_message or "Upload MellonPipelineConfig", create_pr=create_pr, - path_in_repo=subfolder, ) logger.info(f"Pipeline config pushed to hub: {repo_id}") @@ -1150,3 +1007,83 @@ def filter_spec_for_block(template_spec: Dict[str, Any], block) -> Optional[Dict default_repo=default_repo, default_dtype=default_dtype, ) + + @classmethod + def from_custom_block( + cls, + block, + node_label: str = None, + input_types: Optional[Dict[str, str]] = None, + output_types: Optional[Dict[str, str]] = None, + ) -> "MellonPipelineConfig": + """ + Create a MellonPipelineConfig from a custom block. + + Args: + block: A block instance with `inputs`, `outputs`, and `expected_components`/`component_names` properties. + Each InputParam/OutputParam should have metadata={"mellon": ""} where type is one of: image, + video, text, checkbox, number, slider, dropdown, model. If metadata is None, maps to "custom". + node_label: The display label for the node. Defaults to block class name with spaces. + input_types: + Optional dict mapping input param names to mellon types. Overrides the block's metadata if provided. + Example: {"prompt": "textbox", "image": "image"} + output_types: + Optional dict mapping output param names to mellon types. Overrides the block's metadata if provided. + Example: {"prompt": "text", "images": "image"} + + Returns: + MellonPipelineConfig instance + """ + if node_label is None: + class_name = block.__class__.__name__ + node_label = "".join([" " + c if c.isupper() else c for c in class_name]).strip() + + if input_types is None: + input_types = {} + if output_types is None: + output_types = {} + + inputs = [] + model_inputs = [] + outputs = [] + + # Process block inputs + for input_param in block.inputs: + if input_param.name is None: + continue + if input_param.name in input_types: + input_param = copy.copy(input_param) + input_param.metadata = {"mellon": input_types[input_param.name]} + print(f" processing input: {input_param.name}, metadata: {input_param.metadata}") + inputs.append(input_param_to_mellon_param(input_param)) + + # Process block outputs + for output_param in block.outputs: + if output_param.name is None: + continue + if output_param.name in output_types: + output_param = copy.copy(output_param) + output_param.metadata = {"mellon": output_types[output_param.name]} + outputs.append(output_param_to_mellon_param(output_param)) + + # Process expected components (all map to model inputs) + component_names = block.component_names + for component_name in component_names: + model_inputs.append(MellonParam.Input.model(component_name)) + + # Always add doc output + outputs.append(MellonParam.doc()) + + node_spec = { + "inputs": inputs, + "model_inputs": model_inputs, + "outputs": outputs, + "required_inputs": [], + "required_model_inputs": [], + "block_name": "custom", + } + + return cls( + node_specs={"custom": node_spec}, + label=node_label, + ) diff --git a/src/diffusers/modular_pipelines/modular_pipeline_utils.py b/src/diffusers/modular_pipelines/modular_pipeline_utils.py index f3b12d716160..5481790a9405 100644 --- a/src/diffusers/modular_pipelines/modular_pipeline_utils.py +++ b/src/diffusers/modular_pipelines/modular_pipeline_utils.py @@ -520,6 +520,7 @@ class InputParam: required: bool = False description: str = "" kwargs_type: str = None + metadata: Dict[str, Any] = None def __repr__(self): return f"<{self.name}: {'required' if self.required else 'optional'}, default={self.default}>" @@ -553,6 +554,7 @@ class OutputParam: type_hint: Any = None description: str = "" kwargs_type: str = None + metadata: Dict[str, Any] = None def __repr__(self): return (