diff --git a/.github/ci_runners/configs/qwen_image/config.yaml b/.github/ci_runners/configs/qwen_image/config.yaml new file mode 100644 index 000000000000..a28b87609354 --- /dev/null +++ b/.github/ci_runners/configs/qwen_image/config.yaml @@ -0,0 +1,14 @@ +pipeline_class: QwenImagePipeline +module: diffusers.pipelines.qwenimage.pipeline_qwenimage + +params_grid: + - name: "landscape_50steps_single" + width: 1344 + height: 768 + num_inference_steps: 50 + true_cfg_scale: 4.0 + parallel: "single" + +prompt: "A 20-year-old East Asian girl with delicate, charming features and large, bright brown eyes, cheerful expression, wavy long hair tied in twin ponytails, fair skin, light makeup, wearing a modern cute dress in bright soft colors, standing indoors at an anime convention, casual iPhone snapshot style." +negative_prompt: "低分辨率,低画质,肢体畸形,手指畸形,画面过饱和,蜡像感,人脸无细节,过度光滑,画面具有AI感。构图混乱。文字模糊,扭曲。" +reference_config: "landscape_50steps_single" diff --git a/.github/ci_runners/configs/qwen_image/variants/2512.yaml b/.github/ci_runners/configs/qwen_image/variants/2512.yaml new file mode 100644 index 000000000000..e6b51b7932f3 --- /dev/null +++ b/.github/ci_runners/configs/qwen_image/variants/2512.yaml @@ -0,0 +1,4 @@ +model_id: Qwen/Qwen-Image-2512 +weight_path: /home/weights/Qwen-Image-2512 +backend: modelscope +extra_params: {} diff --git a/.github/ci_runners/configs/qwen_image_edit/config.yaml b/.github/ci_runners/configs/qwen_image_edit/config.yaml new file mode 100644 index 000000000000..3e882a2ef294 --- /dev/null +++ b/.github/ci_runners/configs/qwen_image_edit/config.yaml @@ -0,0 +1,15 @@ +pipeline_class: QwenImageEditPipeline +module: diffusers.pipelines.qwenimage.pipeline_qwenimage_edit + +params_grid: + - name: "square_50steps_single" + width: 1024 + height: 1024 + num_inference_steps: 50 + true_cfg_scale: 4.0 + parallel: "single" + image: ".github/ci_runners/test_data/sample_input.png" + +prompt: "Transform this image into a watercolor painting style." +negative_prompt: "blurry, low quality, distorted." +reference_config: "square_50steps_single" diff --git a/.github/ci_runners/configs/qwen_image_edit/variants/2511.yaml b/.github/ci_runners/configs/qwen_image_edit/variants/2511.yaml new file mode 100644 index 000000000000..982c5c5316ea --- /dev/null +++ b/.github/ci_runners/configs/qwen_image_edit/variants/2511.yaml @@ -0,0 +1,4 @@ +model_id: Qwen/Qwen-Image-Edit-2511 +weight_path: /home/weights/Qwen-Image-Edit-2511 +backend: modelscope +extra_params: {} diff --git a/.github/ci_runners/configs/qwen_image_layered/config.yaml b/.github/ci_runners/configs/qwen_image_layered/config.yaml new file mode 100644 index 000000000000..2f94f5a03650 --- /dev/null +++ b/.github/ci_runners/configs/qwen_image_layered/config.yaml @@ -0,0 +1,16 @@ +pipeline_class: QwenImageLayeredPipeline +module: diffusers.pipelines.qwenimage.pipeline_qwenimage_layered + +params_grid: + - name: "default_50steps_single" + width: 1024 + height: 1024 + num_inference_steps: 50 + true_cfg_scale: 4.0 + layers: 4 + resolution: 640 + parallel: "single" + +prompt: "A cute cat sitting on a wooden table." +negative_prompt: "blurry, low quality, distorted." +reference_config: "default_50steps_single" diff --git a/.github/ci_runners/configs/qwen_image_layered/variants/default.yaml b/.github/ci_runners/configs/qwen_image_layered/variants/default.yaml new file mode 100644 index 000000000000..a307307d648a --- /dev/null +++ b/.github/ci_runners/configs/qwen_image_layered/variants/default.yaml @@ -0,0 +1,4 @@ +model_id: Qwen/Qwen-Image-Layered +weight_path: /home/weights/Qwen-Image-Layered +backend: modelscope +extra_params: {} diff --git a/.github/ci_runners/configs/wan_i2v/config.yaml b/.github/ci_runners/configs/wan_i2v/config.yaml new file mode 100644 index 000000000000..6cdc4e2136c7 --- /dev/null +++ b/.github/ci_runners/configs/wan_i2v/config.yaml @@ -0,0 +1,16 @@ +pipeline_class: WanImageToVideoPipeline +module: diffusers.pipelines.wan.pipeline_wan_i2v + +params_grid: + - name: "small_30steps_single" + width: 480 + height: 272 + num_inference_steps: 30 + guidance_scale: 5.0 + num_frames: 33 + parallel: "single" + image: ".github/ci_runners/test_data/sample_input.png" + +prompt: "A cat walking on a beach at sunset, cinematic quality." +negative_prompt: "blurry, low quality, distorted." +reference_config: "small_30steps_single" diff --git a/.github/ci_runners/configs/wan_i2v/variants/wan2.1_i2v_14b_480p.yaml b/.github/ci_runners/configs/wan_i2v/variants/wan2.1_i2v_14b_480p.yaml new file mode 100644 index 000000000000..b58002836a31 --- /dev/null +++ b/.github/ci_runners/configs/wan_i2v/variants/wan2.1_i2v_14b_480p.yaml @@ -0,0 +1,4 @@ +model_id: Wan-AI/Wan2.1-I2V-14B-480P-Diffusers +weight_path: /home/weights/Wan2.1-I2V-14B-480P-Diffusers +backend: modelscope +extra_params: {} diff --git a/.github/ci_runners/configs/wan_i2v/variants/wan2.2_i2v_a14b.yaml b/.github/ci_runners/configs/wan_i2v/variants/wan2.2_i2v_a14b.yaml new file mode 100644 index 000000000000..f351a5132f3b --- /dev/null +++ b/.github/ci_runners/configs/wan_i2v/variants/wan2.2_i2v_a14b.yaml @@ -0,0 +1,6 @@ +model_id: Wan-AI/Wan2.2-I2V-A14B-Diffusers +weight_path: /home/weights/Wan2.2-I2V-A14B-Diffusers +backend: modelscope +extra_params: + boundary_ratio: 0.875 + guidance_scale_2: 5.0 diff --git a/.github/ci_runners/configs/wan_pipeline/config.yaml b/.github/ci_runners/configs/wan_pipeline/config.yaml new file mode 100644 index 000000000000..fe64e6a87699 --- /dev/null +++ b/.github/ci_runners/configs/wan_pipeline/config.yaml @@ -0,0 +1,15 @@ +pipeline_class: WanPipeline +module: diffusers.pipelines.wan.pipeline_wan + +params_grid: + - name: "small_30steps_single" + width: 640 + height: 352 + num_inference_steps: 30 + guidance_scale: 5.0 + num_frames: 49 + parallel: "single" + +prompt: "A cat walking on a beach at sunset, cinematic quality." +negative_prompt: "blurry, low quality, distorted." +reference_config: "small_30steps_single" diff --git a/.github/ci_runners/configs/wan_pipeline/variants/wan2.1_t2v_14b.yaml b/.github/ci_runners/configs/wan_pipeline/variants/wan2.1_t2v_14b.yaml new file mode 100644 index 000000000000..60bf33ad1319 --- /dev/null +++ b/.github/ci_runners/configs/wan_pipeline/variants/wan2.1_t2v_14b.yaml @@ -0,0 +1,4 @@ +model_id: Wan-AI/Wan2.1-T2V-14B-Diffusers +weight_path: /home/weights/Wan2.1-T2V-14B-Diffusers +backend: modelscope +extra_params: {} diff --git a/.github/ci_runners/configs/wan_pipeline/variants/wan2.2_t2v_a14b.yaml b/.github/ci_runners/configs/wan_pipeline/variants/wan2.2_t2v_a14b.yaml new file mode 100644 index 000000000000..a1f2afa19330 --- /dev/null +++ b/.github/ci_runners/configs/wan_pipeline/variants/wan2.2_t2v_a14b.yaml @@ -0,0 +1,7 @@ +model_id: Wan-AI/Wan2.2-T2V-A14B-Diffusers +weight_path: /home/weights/Wan2.2-T2V-A14B-Diffusers +backend: modelscope +extra_params: + boundary_ratio: 0.875 + guidance_scale_2: 5.0 + expand_timesteps: false diff --git a/.github/ci_runners/configs/wan_pipeline/variants/wan2.2_ti2v_5b.yaml b/.github/ci_runners/configs/wan_pipeline/variants/wan2.2_ti2v_5b.yaml new file mode 100644 index 000000000000..6ed3bb11e840 --- /dev/null +++ b/.github/ci_runners/configs/wan_pipeline/variants/wan2.2_ti2v_5b.yaml @@ -0,0 +1,4 @@ +model_id: Wan-AI/Wan2.2-TI2V-5B-Diffusers +weight_path: /home/weights/Wan2.2-TI2V-5B-Diffusers +backend: modelscope +extra_params: {} diff --git a/.github/ci_runners/configs/wan_vace/config.yaml b/.github/ci_runners/configs/wan_vace/config.yaml new file mode 100644 index 000000000000..87a3f7bda6dc --- /dev/null +++ b/.github/ci_runners/configs/wan_vace/config.yaml @@ -0,0 +1,15 @@ +pipeline_class: WanVACEPipeline +module: diffusers.pipelines.wan.pipeline_wan_vace + +params_grid: + - name: "small_30steps_single" + width: 480 + height: 272 + num_inference_steps: 30 + guidance_scale: 5.0 + num_frames: 33 + parallel: "single" + +prompt: "A cat walking on a beach at sunset, cinematic quality." +negative_prompt: "blurry, low quality, distorted." +reference_config: "small_30steps_single" diff --git a/.github/ci_runners/configs/wan_vace/variants/flf2v_14b_720p.yaml b/.github/ci_runners/configs/wan_vace/variants/flf2v_14b_720p.yaml new file mode 100644 index 000000000000..1ed66107ff53 --- /dev/null +++ b/.github/ci_runners/configs/wan_vace/variants/flf2v_14b_720p.yaml @@ -0,0 +1,4 @@ +model_id: Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers +weight_path: /home/weights/Wan2.1-FLF2V-14B-720P-diffusers +backend: modelscope +extra_params: {} diff --git a/.github/ci_runners/configs/wan_vace/variants/vace_14b.yaml b/.github/ci_runners/configs/wan_vace/variants/vace_14b.yaml new file mode 100644 index 000000000000..a5eb62a9b86f --- /dev/null +++ b/.github/ci_runners/configs/wan_vace/variants/vace_14b.yaml @@ -0,0 +1,4 @@ +model_id: Wan-AI/Wan2.1-VACE-14B-diffusers +weight_path: /home/weights/Wan2.1-VACE-14B-diffusers +backend: modelscope +extra_params: {} diff --git a/.github/ci_runners/configs/z_image/config.yaml b/.github/ci_runners/configs/z_image/config.yaml new file mode 100644 index 000000000000..fb8a6fadae7a --- /dev/null +++ b/.github/ci_runners/configs/z_image/config.yaml @@ -0,0 +1,16 @@ +pipeline_class: ZImagePipeline +module: diffusers.pipelines.z_image.pipeline_z_image + +params_grid: + - name: "landscape_50steps_single" + width: 1024 + height: 1024 + num_inference_steps: 50 + guidance_scale: 5.0 + cfg_normalization: false + cfg_truncation: 1.0 + parallel: "single" + +prompt: "A serene mountain lake at sunrise, hyperrealistic, 8k quality." +negative_prompt: "" +reference_config: "landscape_50steps_single" diff --git a/.github/ci_runners/configs/z_image/variants/default.yaml b/.github/ci_runners/configs/z_image/variants/default.yaml new file mode 100644 index 000000000000..cdbb452a6930 --- /dev/null +++ b/.github/ci_runners/configs/z_image/variants/default.yaml @@ -0,0 +1,4 @@ +model_id: Tongyi-MAI/Z-Image +weight_path: /home/weights/Z-Image +backend: modelscope +extra_params: {} diff --git a/.github/ci_runners/issue_bot.py b/.github/ci_runners/issue_bot.py new file mode 100644 index 000000000000..f685c16e3682 --- /dev/null +++ b/.github/ci_runners/issue_bot.py @@ -0,0 +1,131 @@ +import argparse +import json +import os +import re +import urllib.request +import urllib.error + + +def find_existing_issues(repo: str, token: str, title_prefix: str) -> list[dict]: + url = f"https://api.github.com/repos/{repo}/issues?state=open&labels=ci-failure" + req = urllib.request.Request(url) + req.add_header("Authorization", f"Bearer {token}") + req.add_header("Accept", "application/vnd.github+json") + req.add_header("User-Agent", "diffusers-ci-bot") + + try: + with urllib.request.urlopen(req) as resp: + issues = json.loads(resp.read()) + return [i for i in issues if title_prefix in i.get("title", "")] + except Exception: + return [] + + +def create_issue(repo: str, token: str, title: str, body: str, label: str = "ci-failure") -> str: + url = f"https://api.github.com/repos/{repo}/issues" + data = json.dumps({"title": title, "body": body, "labels": [label]}).encode("utf-8") + + req = urllib.request.Request(url, data=data, method="POST") + req.add_header("Authorization", f"Bearer {token}") + req.add_header("Accept", "application/vnd.github+json") + req.add_header("User-Agent", "diffusers-ci-bot") + req.add_header("Content-Type", "application/json") + + with urllib.request.urlopen(req) as resp: + result = json.loads(resp.read()) + return result.get("html_url", "") + + +def classify_error(error_text: str) -> str: + if "out of memory" in error_text.lower() or "OOM" in error_text: + return ("**错误类型**: OOM (显存不足)\n\n" + "**建议**: 减小 `num_frames`、`width`/`height`,或启用 `enable_attention_slicing`") + if "connection" in error_text.lower() or "timeout" in error_text.lower(): + return ("**错误类型**: 网络/连接异常\n\n" + "**建议**: 检查 HuggingFace/ModelScope 网络连通性,或切换到本地权重") + if "key" in error_text.lower() or "AttributeError" in error_text: + return ("**错误类型**: API 不兼容\n\n" + "**建议**: 检查 diffusers 版本与模型是否匹配,检查参数名是否正确(true_cfg_scale vs guidance_scale)") + if "import" in error_text.lower() or "ModuleNotFoundError" in error_text.lower(): + return ("**错误类型**: 依赖缺失\n\n" + "**建议**: 检查 requirements,安装缺失的依赖包") + return ("**错误类型**: 未知\n\n" + "**建议**: 请查看上方错误日志进行人工排查") + + +def build_issue_title(pipeline: str, variant: str, date_str: str) -> str: + return f"[CI] {pipeline} / {variant} 运行失败 ({date_str})" + + +def build_issue_body(r: dict, run_url: str) -> str: + error_text = r.get("error", "无错误信息") + lines = [ + "## 失败信息", + f"- **Pipeline**: {r['pipeline']}", + f"- **Variant**: {r['variant']}", + f"- **配置**: {r['config_name']}", + f"- **设备**: {r['device']} / {r['dtype']}", + f"- **并行策略**: {r['parallel']}", + "", + "## 修复建议", + classify_error(error_text), + "", + "## 错误日志", + "```", + error_text[:5000], + "```", + "", + f"## 完整日志", + f"[Actions Run]({run_url})" if run_url else run_url, + ] + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--report", required=True, help="output directory containing all_results.json") + args = parser.parse_args() + + repo = os.environ.get("GITHUB_REPOSITORY", "luren55/diffusers") + token = os.environ.get("GITHUB_TOKEN", "") + run_url = "" + server_url = os.environ.get("GITHUB_SERVER_URL", "https://github.com") + run_id = os.environ.get("GITHUB_RUN_ID", "") + if server_url and repo and run_id: + run_url = f"{server_url}/{repo}/actions/runs/{run_id}" + + results_path = os.path.join(args.report, "all_results.json") + if not os.path.isfile(results_path): + print(f"ERROR: {results_path} not found") + return + + with open(results_path, "r") as f: + results = json.load(f) + + failed = [r for r in results if r["result"] and r["result"].get("status") == "failed"] + + if not failed: + print("No failures, no issues to create.") + return + + date_str = failed[0]["timestamp"][:10] + + for r in failed: + title = build_issue_title(r["pipeline"], r["variant"], date_str) + title_prefix = f"[CI] {r['pipeline']} / {r['variant']}" + + existing = find_existing_issues(repo, token, title_prefix) + if existing: + print(f"[SKIP] Issue already exists for {r['pipeline']} / {r['variant']}: {existing[0].get('html_url')}") + continue + + body = build_issue_body(r, run_url) + try: + url = create_issue(repo, token, title, body) + print(f"[OK] Created issue: {url}") + except Exception as e: + print(f"[FAIL] Could not create issue: {e}") + + +if __name__ == "__main__": + main() diff --git a/.github/ci_runners/reference/.gitkeep b/.github/ci_runners/reference/.gitkeep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/.github/ci_runners/report.py b/.github/ci_runners/report.py new file mode 100644 index 000000000000..7f2070032f8a --- /dev/null +++ b/.github/ci_runners/report.py @@ -0,0 +1,68 @@ +import argparse +import json +import os +from datetime import datetime, timezone + + +def generate_report(results_path: str) -> str: + with open(results_path, "r") as f: + results = json.load(f) + + passed = [r for r in results if r["result"] and r["result"].get("status") == "passed"] + failed = [r for r in results if r["result"] and r["result"].get("status") == "failed"] + + lines = [] + lines.append("# Diffusers Model CI Report") + lines.append(f"**Generated**: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}") + lines.append(f"**Total**: {len(results)} | **Passed**: {len(passed)} | **Failed**: {len(failed)}") + lines.append("") + + if passed: + lines.append("## Passed") + lines.append("| Pipeline | Variant | Config | Parallel | Time (s) | PSNR | SSIM |") + lines.append("|---|---|---|---|---|---|---|") + for r in passed: + prec = r["result"].get("precision", {}) + psnr = prec.get("psnr", "-") if isinstance(prec, dict) else "-" + ssim = prec.get("ssim", "-") if isinstance(prec, dict) else "-" + lines.append( + f"| {r['pipeline']} | {r['variant']} | {r['config_name']} | " + f"{r['parallel']} | {r['result'].get('inference_time_s', '-')} | " + f"{psnr} | {ssim} |" + ) + lines.append("") + + if failed: + lines.append("## Failed") + for r in failed: + lines.append(f"### {r['pipeline']} / {r['variant']} / {r['config_name']}") + lines.append(f"- **Parallel**: {r['parallel']}") + lines.append(f"- **Device**: {r['device']} / {r['dtype']}") + lines.append("```") + lines.append(r.get("error", "")[:2000]) + lines.append("```") + lines.append("") + else: + lines.append("## All tests passed!") + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--output", required=True, help="output directory containing all_results.json") + args = parser.parse_args() + + results_path = os.path.join(args.output, "all_results.json") + report = generate_report(results_path) + + report_path = os.path.join(args.output, "report.md") + with open(report_path, "w") as f: + f.write(report) + + print(report) + print(f"\nReport written to {report_path}") + + +if __name__ == "__main__": + main() diff --git a/.github/ci_runners/run_pipeline.py b/.github/ci_runners/run_pipeline.py new file mode 100644 index 000000000000..4fef3250cc95 --- /dev/null +++ b/.github/ci_runners/run_pipeline.py @@ -0,0 +1,350 @@ +import argparse +import gc +import importlib +import json +import os +import sys +import traceback +import yaml +from pathlib import Path +from datetime import datetime, timezone + +import torch +from diffusers import DiffusionPipeline + +sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)))) +from runner_utils import detect_device, timer, validate_image, compare_with_reference + + +def load_yaml(path: str) -> dict: + with open(path, "r") as f: + return yaml.safe_load(f) + + +def save_image(image, output_dir: str, filename: str) -> str: + os.makedirs(os.path.join(output_dir, "images"), exist_ok=True) + filepath = os.path.join(output_dir, "images", filename) + if hasattr(image, "save"): + image.save(filepath) + elif isinstance(image, list) and len(image) > 0 and hasattr(image[0], "save"): + image = image[0] + image.save(filepath) + else: + from PIL import Image + Image.fromarray(image).save(filepath) + return filepath + + +def load_pipeline(pipeline_class_name: str, module_path: str, weight_path: str, model_id: str, backend: str, device: str, torch_dtype: torch.dtype): + module = importlib.import_module(module_path) + pipeline_cls = getattr(module, pipeline_class_name) + + if backend == "local" and weight_path and os.path.isdir(weight_path): + pipe = pipeline_cls.from_pretrained( + weight_path, + torch_dtype=torch_dtype, + trust_remote_code=True, + ) + elif backend == "modelscope": + from modelscope import snapshot_download + local_path = snapshot_download(model_id or weight_path) + pipe = pipeline_cls.from_pretrained( + local_path, + torch_dtype=torch_dtype, + trust_remote_code=True, + ) + elif backend == "hf": + pipe = pipeline_cls.from_pretrained( + model_id, + torch_dtype=torch_dtype, + trust_remote_code=True, + ) + else: + pipe = pipeline_cls.from_pretrained( + weight_path, + torch_dtype=torch_dtype, + trust_remote_code=True, + ) + pipe = pipe.to(device) + return pipe + + +def apply_optimizations(pipe, parallel: str): + if parallel == "single": + if hasattr(pipe, "enable_attention_slicing"): + try: + pipe.enable_attention_slicing() + except Exception: + pass + if hasattr(pipe, "enable_vae_slicing"): + try: + pipe.enable_vae_slicing() + except Exception: + pass + + +def build_kwargs(params_grid_entry: dict, prompt: str, negative_prompt: str, device: str, extra_params: dict | None = None) -> dict: + grid_copy = dict(params_grid_entry) + parallel = grid_copy.pop("parallel", "single") + name = grid_copy.pop("name", "unnamed") + + kwargs = {k: v for k, v in grid_copy.items()} + kwargs["prompt"] = prompt + if negative_prompt: + kwargs["negative_prompt"] = negative_prompt + + if extra_params: + kwargs.update(extra_params) + + if "image" in kwargs and isinstance(kwargs["image"], str): + from PIL import Image + image_path = kwargs["image"] + if os.path.isfile(image_path): + kwargs["image"] = Image.open(image_path).convert("RGB") + else: + del kwargs["image"] + + gen_device = "cuda" if device == "cuda" else "cpu" + kwargs["generator"] = torch.Generator(device=gen_device).manual_seed(42) + + return kwargs, parallel, name + + +def run_single_config(pipe, kwargs: dict, parallel: str, name: str, + pipeline: str, variant_name: str, device: str, dtype_str: str, + output_dir: str, ref_image_path: str | None = None) -> dict: + result = { + "pipeline": pipeline, + "variant": variant_name, + "config_name": name, + "parallel": parallel, + "device": device, + "dtype": dtype_str, + "params": {k: v for k, v in kwargs.items() + if k not in ("prompt", "negative_prompt", "generator", "image")}, + "result": None, + "error": None, + "timestamp": datetime.now(timezone.utc).isoformat(), + } + + try: + with timer() as get_elapsed: + output = pipe(**kwargs) + + inference_time = round(get_elapsed(), 2) + image = output.images[0] + + filename = f"{pipeline}_{variant_name}_{name}.png" + out_path = save_image(image, output_dir, filename) + + width = kwargs.get("width", 0) + height = kwargs.get("height", 0) + validation = validate_image(image, width, height) + + precision = {} + if ref_image_path and os.path.isfile(ref_image_path): + precision = compare_with_reference(image, ref_image_path) + elif ref_image_path: + precision = {"error": f"reference not found: {ref_image_path}"} + + result["result"] = { + "status": "passed", + "inference_time_s": inference_time, + "output_file": os.path.relpath(out_path, output_dir), + "validation": validation, + "precision": precision, + } + except Exception as e: + result["result"] = {"status": "failed"} + result["error"] = "".join(traceback.format_exception(type(e), e, e.__traceback__)) + + return result + + +def generate_reference_only(config_dir: str, output_dir: str): + config = load_yaml(os.path.join(config_dir, "config.yaml")) + ref_config_name = config.get("reference_config") + if not ref_config_name: + print(f"[SKIP] {config_dir}: no reference_config defined") + return + + ref_entry = None + for entry in config["params_grid"]: + if entry.get("name") == ref_config_name: + ref_entry = entry + break + if not ref_entry: + print(f"[SKIP] {config_dir}: reference_config '{ref_config_name}' not found in params_grid") + return + + pipeline_name = os.path.basename(config_dir) + pipeline_class = config["pipeline_class"] + module_path = config["module"] + prompt = config.get("prompt", "") + + variants_dir = os.path.join(config_dir, "variants") + variant_files = sorted(Path(variants_dir).glob("*.yaml")) + + pipeline_label = os.path.basename(config_dir).replace("/", "_") + + if not variant_files: + print(f"[SKIP] {pipeline_name}: no variant files") + return + + variant_data = load_yaml(str(variant_files[0])) + device, torch_dtype = detect_device() + + print(f"[{pipeline_name}] loading model ({variant_files[0].stem})...") + pipe = load_pipeline( + pipeline_class, module_path, + variant_data.get("weight_path", variant_data["model_id"]), + variant_data["model_id"], + variant_data.get("backend", "local"), + device, torch_dtype, + ) + apply_optimizations(pipe, ref_entry.get("parallel", "single")) + + kwargs, _, _ = build_kwargs(ref_entry, prompt, config.get("negative_prompt", ""), device, + variant_data.get("extra_params")) + output = pipe(**kwargs) + image = output.images[0] + + ci_runners_dir = os.path.dirname(os.path.dirname(config_dir)) + ref_dir = os.path.join(ci_runners_dir, "reference") + os.makedirs(ref_dir, exist_ok=True) + ref_path = os.path.join(ref_dir, f"{pipeline_label}_{ref_config_name}.png") + image.save(ref_path) + print(f"[{pipeline_name}] reference saved to {ref_path}") + + del pipe + gc.collect() + if device == "cuda": + torch.cuda.empty_cache() + elif device == "npu": + torch.npu.empty_cache() + + +def scan_pipelines(configs_root: str) -> list[str]: + pipeline_dirs = [] + for entry in sorted(os.listdir(configs_root)): + full_path = os.path.join(configs_root, entry) + if os.path.isdir(full_path) and os.path.isfile(os.path.join(full_path, "config.yaml")): + pipeline_dirs.append(full_path) + return pipeline_dirs + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--configs", required=True, help="path to configs/ directory") + parser.add_argument("--output", required=True, help="output directory for results") + parser.add_argument("--generate-reference", action="store_true", + help="generate reference images instead of running full CI") + args = parser.parse_args() + + os.makedirs(args.output, exist_ok=True) + + if args.generate_reference: + for pipe_dir in scan_pipelines(args.configs): + generate_reference_only(pipe_dir, args.output) + return + + all_results = [] + device, torch_dtype = detect_device() + dtype_str = str(torch_dtype).split(".")[-1] + + for pipe_dir in scan_pipelines(args.configs): + config = load_yaml(os.path.join(pipe_dir, "config.yaml")) + pipeline_class = config["pipeline_class"] + module_path = config["module"] + prompt = config.get("prompt", "") + negative_prompt = config.get("negative_prompt", "") + ref_config_name = config.get("reference_config") + pipeline_label = os.path.basename(pipe_dir).replace("/", "_") + + variants_dir = os.path.join(pipe_dir, "variants") + variant_files = sorted(Path(variants_dir).glob("*.yaml")) + + if not variant_files: + print(f"[WARN] {pipeline_label}: no variant files found, skipping") + continue + + for vf in variant_files: + variant_data = load_yaml(str(vf)) + variant_name = vf.stem + + print(f"[{pipeline_class}] loading model: {variant_name}") + try: + pipe = load_pipeline( + pipeline_class, module_path, + variant_data.get("weight_path", variant_data["model_id"]), + variant_data["model_id"], + variant_data.get("backend", "local"), + device, torch_dtype, + ) + except Exception as e: + result = { + "pipeline": pipeline_class, + "variant": variant_name, + "config_name": "model_load", + "parallel": "N/A", + "device": device, + "dtype": dtype_str, + "params": {}, + "result": {"status": "failed"}, + "error": "".join(traceback.format_exception(type(e), e, e.__traceback__)), + "timestamp": datetime.now(timezone.utc).isoformat(), + } + all_results.append(result) + continue + + for params_entry in config["params_grid"]: + kwargs, parallel, config_name = build_kwargs( + params_entry, prompt, negative_prompt, device, + variant_data.get("extra_params"), + ) + apply_optimizations(pipe, parallel) + + ref_image_path = None + if config_name == ref_config_name: + ci_runners_dir = os.path.dirname(os.path.dirname(pipe_dir)) + ref_image_path = os.path.join( + ci_runners_dir, "reference", + f"{pipeline_label}_{ref_config_name}.png" + ) + + print(f" [{config_name}] parallel={parallel} ...") + result = run_single_config( + pipe, kwargs, parallel, config_name, + pipeline_class, variant_name, device, dtype_str, + args.output, ref_image_path, + ) + all_results.append(result) + + status = result["result"].get("status", "unknown") + if status == "passed": + t = result["result"].get("inference_time_s", "?") + print(f" PASS ({t}s)") + else: + print(f" FAIL") + + del pipe + if device == "cuda": + torch.cuda.empty_cache() + elif device == "npu": + torch.npu.empty_cache() + + results_path = os.path.join(args.output, "all_results.json") + with open(results_path, "w") as f: + json.dump(all_results, f, indent=2, ensure_ascii=False) + + passed = sum(1 for r in all_results if r["result"] and r["result"].get("status") == "passed") + failed = sum(1 for r in all_results if r["result"] and r["result"].get("status") == "failed") + print(f"\nDone. {len(all_results)} runs: {passed} passed, {failed} failed.") + print(f"Results written to {results_path}") + + if failed > 0: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/.github/ci_runners/runner_utils.py b/.github/ci_runners/runner_utils.py new file mode 100644 index 000000000000..01dab54fcb52 --- /dev/null +++ b/.github/ci_runners/runner_utils.py @@ -0,0 +1,70 @@ +import json +import time +from contextlib import contextmanager + +import torch +from PIL import Image +import numpy as np + +try: + from skimage.metrics import structural_similarity as ssim + HAS_SKIMAGE = True +except ImportError: + HAS_SKIMAGE = False + + +def detect_device() -> tuple[str, torch.dtype]: + try: + import torch_npu # noqa: F401 + if hasattr(torch, "npu") and torch.npu.is_available(): + return "npu", torch.bfloat16 + except Exception: + pass + if torch.cuda.is_available(): + return "cuda", torch.bfloat16 + return "cpu", torch.float32 + + +@contextmanager +def timer(): + start = time.perf_counter() + elapsed = 0.0 + try: + yield lambda: elapsed + finally: + elapsed = time.perf_counter() - start + + +def validate_image(image: Image.Image, expected_width: int, expected_height: int) -> dict: + w, h = image.size + dimensions_ok = (w == expected_width and h == expected_height) + + arr = np.array(image.convert("RGB"), dtype=np.float32) + non_black = bool(arr.max() > 5.0) + + return {"dimensions_ok": dimensions_ok, "non_black": non_black} + + +def compare_with_reference(image: Image.Image, ref_path: str) -> dict: + if not HAS_SKIMAGE: + return {"error": "skimage not installed, cannot compute PSNR/SSIM"} + try: + ref = Image.open(ref_path).convert("RGB") + except FileNotFoundError: + return {"error": f"reference image not found: {ref_path}"} + + img_arr = np.array(image, dtype=np.float64) + ref_arr = np.array(ref, dtype=np.float64) + + if img_arr.shape != ref_arr.shape: + return {"error": f"shape mismatch: {img_arr.shape} vs {ref_arr.shape}"} + + mse = np.mean((img_arr - ref_arr) ** 2) + if mse == 0: + psnr = float("inf") + else: + psnr = 20 * np.log10(255.0 / np.sqrt(mse)) + + ssim_val = ssim(ref_arr, img_arr, channel_axis=-1, data_range=255) + + return {"psnr": round(psnr, 2), "ssim": round(ssim_val, 4)} diff --git a/.github/ci_runners/test_data/sample_input.png b/.github/ci_runners/test_data/sample_input.png new file mode 100644 index 000000000000..6bb1cb5e0c34 Binary files /dev/null and b/.github/ci_runners/test_data/sample_input.png differ diff --git a/.github/workflows/model_ci.yml b/.github/workflows/model_ci.yml new file mode 100644 index 000000000000..b4c3319653ba --- /dev/null +++ b/.github/workflows/model_ci.yml @@ -0,0 +1,43 @@ +name: Model Pipeline CI + +on: + schedule: + - cron: "0 0 1,15 * *" + workflow_dispatch: + +env: + DIFFUSERS_IS_CI: yes + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + +jobs: + run_pipelines: + runs-on: [self-hosted, diffusers_ci] + steps: + - name: Run all pipelines + run: | + cd /home/m30063890/diffusers + git pull origin main + python .github/ci_runners/run_pipeline.py \ + --configs .github/ci_runners/configs \ + --output /tmp/artifacts/run-${GITHUB_RUN_ID} + + - name: Generate report + if: always() + run: | + cd /home/m30063890/diffusers + python .github/ci_runners/report.py --output /tmp/artifacts/run-${GITHUB_RUN_ID} + + - name: Create issues for failures + if: failure() + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + cd /home/m30063890/diffusers + python .github/ci_runners/issue_bot.py --report /tmp/artifacts/run-${GITHUB_RUN_ID} + + - name: Upload artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: ci-run-${GITHUB_RUN_ID} + path: /tmp/artifacts/run-${GITHUB_RUN_ID}/ diff --git a/.gitignore b/.gitignore index 5d19ea2db3c9..1bc8b89a82bd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,187 +1,8 @@ -# Initially taken from GitHub's Python gitignore file - -# Byte-compiled / optimized / DLL files +# CI runner artifacts +_actions/ +_PipelineMapping/ +_update/ +diffusers/ +fusion_result.json +src/diffusers.egg-info/ __pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# tests and logs -tests/fixtures/cached_*_text.txt -logs/ -lightning_logs/ -lang_code_data/ - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a Python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# vscode -.vs -.vscode - -# Cursor -.cursor - -# Pycharm -.idea - -# TF code -tensorflow_code - -# Models -proc_data - -# examples -runs -/runs_old -/wandb -/examples/runs -/examples/**/*.args -/examples/rag/sweep - -# data -/data -serialization_dir - -# emacs -*.*~ -debug.env - -# vim -.*.swp - -# ctags -tags - -# pre-commit -.pre-commit* - -# .lock -*.lock - -# DS_Store (MacOS) -.DS_Store - -# RL pipelines may produce mp4 outputs -*.mp4 - -# dependencies -/transformers - -# ruff -.ruff_cache - -# wandb -wandb - -# AI agent generated symlinks -/AGENTS.md -/CLAUDE.md -/.agents/skills -/.claude/skills \ No newline at end of file diff --git a/src/diffusers/models/transformers/transformer_z_image.py b/src/diffusers/models/transformers/transformer_z_image.py index ba401e7fdef1..7b3d016e6e58 100644 --- a/src/diffusers/models/transformers/transformer_z_image.py +++ b/src/diffusers/models/transformers/transformer_z_image.py @@ -24,6 +24,7 @@ from ...models.attention_processor import Attention from ...models.modeling_utils import ModelMixin from ...models.normalization import RMSNorm +from ...utils import is_torch_npu_available from ...utils.torch_utils import maybe_allow_in_graph from ..attention_dispatch import dispatch_attention_fn from ..modeling_outputs import Transformer2DModelOutput @@ -322,37 +323,72 @@ def __init__( self.axes_lens = axes_lens assert len(axes_dims) == len(axes_lens), "axes_dims and axes_lens must have the same length" self.freqs_cis = None + self.freqs_real = None + self.freqs_imag = None @staticmethod def precompute_freqs_cis(dim: list[int], end: list[int], theta: float = 256.0): with torch.device("cpu"): - freqs_cis = [] - for i, (d, e) in enumerate(zip(dim, end)): - freqs = 1.0 / (theta ** (torch.arange(0, d, 2, dtype=torch.float64, device="cpu") / d)) - timestep = torch.arange(e, device=freqs.device, dtype=torch.float64) - freqs = torch.outer(timestep, freqs).float() - freqs_cis_i = torch.polar(torch.ones_like(freqs), freqs).to(torch.complex64) # complex64 - freqs_cis.append(freqs_cis_i) - - return freqs_cis + if is_torch_npu_available: + freqs_real_list = [] + freqs_imag_list = [] + for i, (d, e) in enumerate(zip(dim, end)): + freqs = 1.0 / (theta ** (torch.arange(0, d, 2, dtype=torch.float64, device="cpu") / d)) + timestep = torch.arange(e, device=freqs.device, dtype=torch.float64) + freqs = torch.outer(timestep, freqs).float() + freqs_real = torch.cos(freqs) + freqs_imag = torch.sin(freqs) + freqs_real_list.append(freqs_real.to(torch.float32)) + freqs_imag_list.append(freqs_imag.to(torch.float32)) + + return freqs_real_list, freqs_imag_list + else: + freqs_cis = [] + for i, (d, e) in enumerate(zip(dim, end)): + freqs = 1.0 / (theta ** (torch.arange(0, d, 2, dtype=torch.float64, device="cpu") / d)) + timestep = torch.arange(e, device=freqs.device, dtype=torch.float64) + freqs = torch.outer(timestep, freqs).float() + freqs_cis_i = torch.polar(torch.ones_like(freqs), freqs).to(torch.complex64) # complex64 + freqs_cis.append(freqs_cis_i) + return freqs_cis def __call__(self, ids: torch.Tensor): assert ids.ndim == 2 assert ids.shape[-1] == len(self.axes_dims) device = ids.device - if self.freqs_cis is None: - self.freqs_cis = self.precompute_freqs_cis(self.axes_dims, self.axes_lens, theta=self.theta) - self.freqs_cis = [freqs_cis.to(device) for freqs_cis in self.freqs_cis] + if is_torch_npu_available: + if self.freqs_real is None or self.freqs_imag is None: + freqs_real, freqs_imag = self.precompute_freqs_cis(self.axes_dims, self.axes_lens, theta=self.theta) + self.freqs_real = [fr.to(device) for fr in freqs_real] + self.freqs_imag = [fi.to(device) for fi in freqs_imag] + else: + # Ensure freqs_cis are on the same device as ids + if self.freqs_real[0].device != device: + self.freqs_real = [fr.to(device) for fr in freqs_real] + self.freqs_imag = [fi.to(device) for fi in freqs_imag] + + result = [] + for i in range(len(self.axes_dims)): + index = ids[:, i] + real_part = self.freqs_real[i][index] + imag_part = self.freqs_imag[i][index] + complex_part = torch.complex(real_part, imag_part) + result.append(complex_part) else: - # Ensure freqs_cis are on the same device as ids - if self.freqs_cis[0].device != device: + if self.freqs_cis is None: + self.freqs_cis = self.precompute_freqs_cis(self.axes_dims, self.axes_lens, theta=self.theta) self.freqs_cis = [freqs_cis.to(device) for freqs_cis in self.freqs_cis] + else: + # Ensure freqs_cis are on the same device as ids + if self.freqs_cis[0].device != device: + self.freqs_cis = [freqs_cis.to(device) for freqs_cis in self.freqs_cis] + + result = [] + for i in range(len(self.axes_dims)): + index = ids[:, i] + result.append(self.freqs_cis[i][index]) - result = [] - for i in range(len(self.axes_dims)): - index = ids[:, i] - result.append(self.freqs_cis[i][index]) return torch.cat(result, dim=-1)