huggingface · ChinChyi · May 6, 2026
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -646,6 +646,8 @@
         title: VisualCloze
       - local: api/pipelines/z_image
         title: Z-Image
+      - local: api/pipelines/unillada
+        title: UniLLaDA
       title: Image
     - sections:
       - local: api/pipelines/llada2

diff --git a/docs/source/en/api/pipelines/unillada.md b/docs/source/en/api/pipelines/unillada.md
@@ -0,0 +1,72 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# UniLLaDA
+
+[UniLLaDA](https://huggingface.co/inclusionAI/LLaDA2.0-Uni) is a unified discrete diffusion language model that supports
+text-to-image generation, image understanding, and image editing through block-wise iterative refinement. It extends
+the [LLaDA2](./llada2) framework with multimodal capabilities.
+
+## Usage
+
+UniLLaDA supports three modes:
+- **Text-to-Image**: Generate images from text prompts.
+- **Image Understanding**: Answer questions about images.
+- **Image Editing**: Edit images based on text instructions.
+
+### Text-to-Image
+
+```py
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from diffusers import BlockRefinementScheduler, UniLLaDaPipeline
+
+model_id = "inclusionAI/LLaDA2.0-Uni"
+model = AutoModelForCausalLM.from_pretrained(
+    model_id, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+scheduler = BlockRefinementScheduler()
+
+pipe = UniLLaDaPipeline(transformer=model, tokenizer=tokenizer, scheduler=scheduler)
+
+result = pipe(prompt="A cat sitting on a windowsill at sunset")
+result.images[0].save("output.png")
+```
+
+### Image Understanding
+
+```py
+from PIL import Image
+
+img = Image.open("photo.jpg")
+result = pipe(image=img, question="Describe this image in detail.")
+print(result.text)
+```
+
+### Image Editing
+
+```py
+result = pipe(image=img, instruction="Change the background to a beach.")
+result.images[0].save("edited.png")
+```
+
+## UniLLaDaPipeline
+
+[[autodoc]] UniLLaDaPipeline
+    - all
+    - __call__
+
+## UniLLaDaPipelineOutput
+
+[[autodoc]] pipelines.UniLLaDaPipelineOutput
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
@@ -303,6 +303,7 @@
             "WanAnimateTransformer3DModel",
             "WanTransformer3DModel",
             "WanVACETransformer3DModel",
+            "LLaDA2UniImageTransformer2DModel",
             "ZImageControlNetModel",
             "ZImageTransformer2DModel",
             "attention_backend",
@@ -380,6 +381,7 @@
             "FlowMatchEulerDiscreteScheduler",
             "FlowMatchHeunDiscreteScheduler",
             "FlowMatchLCMScheduler",
+            "LLaDA2UniFlowMatchEulerScheduler",
             "HeliosDMDScheduler",
             "HeliosScheduler",
             "HeunDiscreteScheduler",
@@ -611,6 +613,8 @@
             "LEditsPPPipelineStableDiffusionXL",
             "LLaDA2Pipeline",
             "LLaDA2PipelineOutput",
+            "UniLLaDaPipeline",
+            "UniLLaDaPipelineOutput",
             "LongCatAudioDiTPipeline",
             "LongCatImageEditPipeline",
             "LongCatImagePipeline",
@@ -1072,6 +1076,7 @@
             Kandinsky3UNet,
             Kandinsky5Transformer3DModel,
             LatteTransformer3DModel,
+            LLaDA2UniImageTransformer2DModel,
             LongCatAudioDiTTransformer,
             LongCatAudioDiTVae,
             LongCatImageTransformer2DModel,
@@ -1201,6 +1206,7 @@
             KDPM2AncestralDiscreteScheduler,
             KDPM2DiscreteScheduler,
             LCMScheduler,
+            LLaDA2UniFlowMatchEulerScheduler,
             LTXEulerAncestralRFScheduler,
             PNDMScheduler,
             RePaintScheduler,
@@ -1529,6 +1535,8 @@
             UniDiffuserModel,
             UniDiffuserPipeline,
             UniDiffuserTextDecoder,
+            UniLLaDaPipeline,
+            UniLLaDaPipelineOutput,
             VersatileDiffusionDualGuidedPipeline,
             VersatileDiffusionImageVariationPipeline,
             VersatileDiffusionPipeline,

diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py
@@ -132,6 +132,7 @@
     _import_structure["transformers.transformer_wan"] = ["WanTransformer3DModel"]
     _import_structure["transformers.transformer_wan_animate"] = ["WanAnimateTransformer3DModel"]
     _import_structure["transformers.transformer_wan_vace"] = ["WanVACETransformer3DModel"]
+    _import_structure["transformers.transformer_llada2uni_image"] = ["LLaDA2UniImageTransformer2DModel"]
     _import_structure["transformers.transformer_z_image"] = ["ZImageTransformer2DModel"]
     _import_structure["unets.unet_1d"] = ["UNet1DModel"]
     _import_structure["unets.unet_2d"] = ["UNet2DModel"]
@@ -238,6 +239,7 @@
             HunyuanVideoTransformer3DModel,
             Kandinsky5Transformer3DModel,
             LatteTransformer3DModel,
+            LLaDA2UniImageTransformer2DModel,
             LongCatAudioDiTTransformer,
             LongCatImageTransformer2DModel,
             LTX2VideoTransformer3DModel,

diff --git a/src/diffusers/models/transformers/__init__.py b/src/diffusers/models/transformers/__init__.py
@@ -37,6 +37,7 @@
     from .transformer_hunyuan_video_framepack import HunyuanVideoFramepackTransformer3DModel
     from .transformer_hunyuanimage import HunyuanImageTransformer2DModel
     from .transformer_kandinsky import Kandinsky5Transformer3DModel
+    from .transformer_llada2uni_image import LLaDA2UniImageTransformer2DModel
     from .transformer_longcat_audio_dit import LongCatAudioDiTTransformer
     from .transformer_longcat_image import LongCatImageTransformer2DModel
     from .transformer_ltx import LTXVideoTransformer3DModel