AI-Hypercomputer
diff --git a/‎src/maxdiffusion/checkpointing/ltx2_checkpointer.py‎
Lines changed: 26 additions & 1 deletion b/‎src/maxdiffusion/checkpointing/ltx2_checkpointer.py‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎src/maxdiffusion/compare.py‎
Lines changed: 75 additions & 0 deletions b/‎src/maxdiffusion/compare.py‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/ltx2_video.yml‎
Lines changed: 10 additions & 1 deletion b/‎src/maxdiffusion/configs/ltx2_video.yml‎
Lines changed: 10 additions & 1 deletion
@@ -19,7 +19,15 @@
 import numpy as np
 from typing import Optional, Tuple
 from maxdiffusion.pipelines.ltx2.ltx2_pipeline import LTX2Pipeline
-from maxdiffusion import max_logging
+from maxdiffusion.models.ltx2.transformer_ltx2 import LTX2VideoTransformer3DModel
+from maxdiffusion.models.ltx2.autoencoder_kl_ltx2 import LTX2VideoAutoencoderKL
+from maxdiffusion.models.ltx2.autoencoder_kl_ltx2_audio import FlaxAutoencoderKLLTX2Audio
+from maxdiffusion.models.ltx2.text_encoders.text_encoders_ltx2 import LTX2AudioVideoGemmaTextEncoder
+from maxdiffusion.models.ltx2.vocoder_ltx2 import LTX2Vocoder
+from maxdiffusion.schedulers.scheduling_flow_match_flax import FlaxFlowMatchScheduler
+from maxdiffusion.models.ltx2.ltx2_utils import load_upsampler_weights
+from transformers import AutoTokenizer, Gemma3ForConditionalGeneration
+from maxdiffusion import max_logging, max_utils
 from maxdiffusion.checkpointing.checkpointing_utils import create_orbax_checkpoint_manager
 import orbax.checkpoint as ocp
 from etils import epath
@@ -95,6 +103,23 @@ def load_checkpoint(
 
     return pipeline, opt_state, step
 
+  def load_upsampler(self, upsampler_model_path: str, eval_shapes: dict = None) -> dict:
+    """
+    Uses the central utils file to load the upsampler weights.
+    """
+    # Assuming standard Hugging Face format (e.g., path/to/latent_upsampler/model.safetensors)
+    max_logging.log("Loading Latent Upsampler from checkpoint...")
+    
+    flax_params = load_upsampler_weights(
+        pretrained_model_name_or_path=upsampler_model_path,
+        eval_shapes=eval_shapes,
+        device=jax.devices()[0].platform,
+        subfolder="latent_upsampler"
+    )
+    
+    return flax_params
+
+
   def save_checkpoint(self, train_step, pipeline: LTX2Pipeline, train_states: dict):
     """Saves the training state and model configurations."""
 
 
@@ -0,0 +1,75 @@
+import torch
+import jax
+import jax.numpy as jnp
+import numpy as np
+
+# 1. ALIAS THE IMPORTS to prevent name collisions!
+from diffusers.pipelines.ltx2.latent_upsampler import LTX2LatentUpsamplerModel as PT_Upsampler
+from maxdiffusion.models.ltx2.latent_upsampler_ltx2 import LTX2LatentUpsamplerModel as JAX_Upsampler
+from maxdiffusion.models.ltx2.ltx2_utils import load_upsampler_weights 
+
+def test_side_by_side():
+    # --- Setup PyTorch ---
+    print("Initializing PyTorch Model...")
+    # Load the real pretrained weights
+    pt_model = PT_Upsampler.from_pretrained("Lightricks/LTX-2", subfolder="latent_upsampler")
+    pt_model.eval()
+
+    # --- Setup JAX ---
+    print("Initializing JAX Model...")
+    jax_model = JAX_Upsampler()
+    
+    print("Loading JAX Weights from HuggingFace...")
+    # Use your actual conversion script to load the exact same weights
+    flax_params = load_upsampler_weights(
+        pretrained_model_name_or_path="Lightricks/LTX-2",
+        eval_shapes=None,
+        device="cpu", # Load into CPU for comparison
+        subfolder="latent_upsampler"
+    ) 
+
+    # for key, value in jax.tree_util.tree_flatten(flax_params)[0]:
+    #   if hasattr(value, 'dtype'):
+    #     print(f"{key}: {value.dtype}, shape: {value.shape}")
+
+    # --- Generate Identical Dummy Data ---
+    # Shape: Batch=1, Channels=128, Frames=8, Height=32, Width=32
+    print("Generating identical random inputs...")
+    torch.manual_seed(42)
+    pt_input = torch.randn(1, 128, 8, 32, 32, dtype=torch.float32)
+    
+    # Convert PyTorch NCDHW -> JAX NDHWC
+    # (0, 2, 3, 4, 1) maps (B, C, F, H, W) -> (B, F, H, W, C)
+    jax_input_np = pt_input.permute(0, 2, 3, 4, 1).numpy()
+    jax_input = jnp.array(jax_input_np)
+
+    # --- Run Forward Passes ---
+    print("Running PyTorch pass...")
+    with torch.no_grad():
+        pt_output = pt_model(pt_input)
+    
+    print("Running JAX pass...")
+    jax_output = jax_model.apply({'params': flax_params}, jax_input)
+
+    # --- Compare Results ---
+    # Convert JAX output back to PyTorch shape: NDHWC -> NCDHW
+    # (0, 4, 1, 2, 3) maps (B, F, H, W, C) -> (B, C, F, H, W)
+    jax_output_converted = torch.tensor(np.array(jax_output)).permute(0, 4, 1, 2, 3)
+
+    # Calculate Mean Squared Error (MSE) and Max Absolute Difference
+    mse = torch.nn.functional.mse_loss(pt_output, jax_output_converted)
+    max_diff = (pt_output - jax_output_converted).abs().max()
+
+    print("\n" + "="*30)
+    print("      COMPARISON RESULTS      ")
+    print("="*30)
+    print(f"Mean Squared Error: {mse.item():.8f}")
+    print(f"Max Absolute Error: {max_diff.item():.8f}")
+    
+    if max_diff.item() < 1e-3:
+        print("\n✅ SUCCESS: The models are mathematically identical!")
+    else:
+        print("\n❌ FAILED: The models diverge. There is a bug in the math/weights.")
+
+if __name__ == "__main__":
+    test_side_by_side()
@@ -9,7 +9,7 @@ names_which_can_be_saved: []
 names_which_can_be_offloaded: []
 remat_policy: "NONE"
 
-jax_cache_dir: ''
+jax_cache_dir: '/mnt/disks/mehdy-disk1/maxdiffusion_hf_cache'
 weights_dtype: 'bfloat16'
 activations_dtype: 'bfloat16'
 
@@ -92,3 +92,12 @@ jit_initializers: True
 enable_single_replica_ckpt_restoring: False
 seed: 0
 audio_format: "s16"
+
+# LTX-2 Latent Upsampler
+run_latent_upsampler: False
+upsampler_model_path: "Lightricks/LTX-2"
+upsampler_spatial_patch_size: 1
+upsampler_temporal_patch_size: 1
+upsampler_adain_factor: 0.0
+upsampler_tone_map_compression_ratio: 0.0
+upsampler_rational_spatial_scale: 2.0