diff --git a/bitmind/__init__.py b/bitmind/__init__.py
index 09fc5e90..b0552aa1 100644
--- a/bitmind/__init__.py
+++ b/bitmind/__init__.py
@@ -18,7 +18,7 @@
 # DEALINGS IN THE SOFTWARE.
 
 
-__version__ = "2.2.8"
+__version__ = "2.2.9"
 version_split = __version__.split(".")
 __spec_version__ = (
     (1000 * int(version_split[0]))
diff --git a/bitmind/synthetic_data_generation/prompt_generator.py b/bitmind/synthetic_data_generation/prompt_generator.py
index a8b8bc09..cd7823e6 100644
--- a/bitmind/synthetic_data_generation/prompt_generator.py
+++ b/bitmind/synthetic_data_generation/prompt_generator.py
@@ -118,6 +118,7 @@ def clear_gpu(self) -> None:
     def generate(
         self,
         image: Image.Image,
+        task: Optional[str] = None,
         max_new_tokens: int = 20,
         verbose: bool = False
     ) -> str:
@@ -127,6 +128,8 @@ def generate(
 
         Args:
             image: The image for which the description is to be generated.
+            task: The generation task ('t2i', 't2v', 'i2i', 'i2v'). If video task,
+                motion descriptions will be added.
             max_new_tokens: The maximum number of tokens to generate for each
                 prompt.
             verbose: If True, additional logging information is printed.
@@ -185,7 +188,10 @@ def generate(
             description += '.'
 
         moderated_description = self.moderate(description)
-        return self.enhance(moderated_description)
+        
+        if task in ['t2v', 'i2v']:
+            return self.enhance(moderated_description)
+        return moderated_description
 
     def moderate(self, description: str, max_new_tokens: int = 80) -> str:
         """
@@ -233,34 +239,28 @@ def enhance(self, description: str, max_new_tokens: int = 80) -> str:
         """
         Enhance a static image description to make it suitable for video generation
         by adding dynamic elements and motion.
-
+    
         Args:
             description: The static image description to enhance.
             max_new_tokens: Maximum number of new tokens to generate in the enhanced text.
-
+    
         Returns:
-            An enhanced description suitable for video generation, or the original
-            description if enhancement fails.
+            An enhanced description suitable for video generation.
         """
         messages = [
             {
                 "role": "system",
                 "content": (
-                    "[INST]You are an expert at converting static image descriptions "
-                    "into dynamic video prompts. Enhance the given description by "
-                    "adding natural motion and temporal elements while preserving the "
-                    "core scene. Follow these rules:\n"
-                    "1. Maintain the essential elements of the original description\n"
-                    "2. Add smooth, continuous motions that work well in video\n"
-                    "3. For portraits: Add natural facial movements or expressions\n"
-                    "4. For non-portrait images with people: Add contextually appropriate "
-                    "actions (e.g., for a beach scene, people might be walking along "
-                    "the shoreline or playing in the waves; for a cafe scene, people "
-                    "might be sipping drinks or engaging in conversation)\n"
-                    "5. For landscapes: Add environmental motion like wind or water\n"
-                    "6. For urban scenes: Add dynamic elements like people or traffic\n"
-                    "7. Keep the description concise but descriptive\n"
-                    "8. Focus on gradual, natural transitions\n"
+                    "[INST]You are an expert at converting image descriptions into video prompts. "
+                    "Analyze the existing motion in the scene and enhance it naturally:\n"
+                    "1. If motion exists in the image (falling, throwing, running, etc.):\n"
+                    "   - Maintain and emphasize that existing motion\n"
+                    "   - Add smooth continuation of the movement\n"
+                    "2. If the subject is static (sitting, standing, placed):\n"
+                    "   - Keep it stable\n"
+                    "   - Add minimal environmental motion if appropriate\n"
+                    "3. Add ONE subtle camera motion that complements the scene\n"
+                    "4. Keep the description concise and natural\n"
                     "Only respond with the enhanced description.[/INST]"
                 )
             },
@@ -280,5 +280,5 @@ def enhance(self, description: str, max_new_tokens: int = 80) -> str:
             return enhanced_text[0]['generated_text']
 
         except Exception as e:
-            print(f"An error occurred during motion enhancement: {e}")
-            return description
+            bt.logging.error(f"An error occurred during motion enhancement: {e}")
+            return description
\ No newline at end of file
diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py
index 19b60607..45eab8e4 100644
--- a/bitmind/synthetic_data_generation/synthetic_data_generator.py
+++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py
@@ -24,6 +24,7 @@
     T2V_MODEL_NAMES,
     T2I_MODEL_NAMES,
     I2I_MODEL_NAMES,
+    I2V_MODEL_NAMES,
     TARGET_IMAGE_SIZE,
     select_random_model,
     get_task,
@@ -152,7 +153,12 @@ def batch_generate(self, batch_size: int = 5) -> None:
             image_sample = self.image_cache.sample()
             images.append(image_sample['image'])
             bt.logging.info(f"Sampled image {i+1}/{batch_size} for captioning: {image_sample['path']}")
-            prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1))
+            task = get_task(self.model_name) if self.model_name else None
+            prompts.append(self.generate_prompt(
+                image=image_sample['image'], 
+                clear_gpu=i==batch_size-1,
+                task=task
+            ))
             bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}")
             
         # If specific model is set, use only that model
@@ -163,9 +169,12 @@ def batch_generate(self, batch_size: int = 5) -> None:
             i2i_model_names = random.sample(I2I_MODEL_NAMES, len(I2I_MODEL_NAMES))
             t2i_model_names = random.sample(T2I_MODEL_NAMES, len(T2I_MODEL_NAMES))
             t2v_model_names = random.sample(T2V_MODEL_NAMES, len(T2V_MODEL_NAMES))
+            i2v_model_names = random.sample(I2V_MODEL_NAMES, len(I2V_MODEL_NAMES))
+            
             model_names = [
-                m for triple in zip_longest(t2v_model_names, t2i_model_names, i2i_model_names) 
-                for m in triple if m is not None
+                m for quad in zip_longest(t2v_model_names, t2i_model_names, 
+                                        i2i_model_names, i2v_model_names) 
+                for m in quad if m is not None
             ]
 
         # Generate for each model/prompt combination
@@ -222,7 +231,7 @@ def generate(
             ValueError: If real_image is None when using annotation prompt type.
             NotImplementedError: If prompt type is not supported.
         """
-        prompt = self.generate_prompt(image, clear_gpu=True)
+        prompt = self.generate_prompt(image, clear_gpu=True, task=task)
         bt.logging.info("Generating synthetic data...")
         gen_data = self._run_generation(prompt, task, model_name, image)
         self.clear_gpu()
@@ -231,7 +240,8 @@ def generate(
     def generate_prompt(
         self, 
         image: Optional[Image.Image] = None,
-        clear_gpu: bool = True
+        clear_gpu: bool = True,
+        task: Optional[str] = None
     ) -> str:
         """Generate a prompt based on the specified strategy."""
         bt.logging.info("Generating prompt")
@@ -241,7 +251,7 @@ def generate_prompt(
                     "image can't be None if self.prompt_type is 'annotation'"
                 )
             self.prompt_generator.load_models()
-            prompt = self.prompt_generator.generate(image)
+            prompt = self.prompt_generator.generate(image, task=task)
             if clear_gpu:
                 self.prompt_generator.clear_gpu()
         else:
@@ -261,9 +271,9 @@ def _run_generation(
 
         Args:
             prompt: The text prompt used to inspire the generation.
-            task: The generation task type ('t2i', 't2v', 'i2i', or None).
+            task: The generation task type ('t2i', 't2v', 'i2i', 'i2v', or None).
             model_name: Optional model name to use for generation.
-            image: Optional input image for image-to-image generation.
+            image: Optional input image for image-to-image or image-to-video generation.
             generate_at_target_size: If True, generate at TARGET_IMAGE_SIZE dimensions.
 
         Returns:
@@ -272,6 +282,10 @@ def _run_generation(
         Raises:
             RuntimeError: If generation fails.
         """
+        # Clear CUDA cache before loading model
+        torch.cuda.empty_cache()
+        gc.collect()
+        
         self.load_model(model_name)
         model_config = MODELS[self.model_name]
         task = get_task(model_name) if task is None else task      
@@ -289,14 +303,38 @@ def _run_generation(
 
             gen_args['mask_image'], mask_center = create_random_mask(image.size)
             gen_args['image'] = image
+        # prep image-to-video generation args
+        elif task == 'i2v':
+            if image is None:
+                raise ValueError("image cannot be None for image-to-video generation")
+            # Get target size from gen_args if specified, otherwise use default
+            target_size = (
+                gen_args.get('height', 768),
+                gen_args.get('width', 768)
+            )
+            if image.size[0] > target_size[0] or image.size[1] > target_size[1]:
+                image = image.resize(target_size, Image.Resampling.LANCZOS)
+            gen_args['image'] = image
 
         # Prepare generation arguments
         for k, v in gen_args.items():
             if isinstance(v, dict):
                 if "min" in v and "max" in v:
-                    gen_args[k] = np.random.randint(v['min'], v['max'])
+                    # For i2v, use minimum values to save memory
+                    if task == 'i2v':
+                        gen_args[k] = v['min']
+                    else:
+                        gen_args[k] = np.random.randint(v['min'], v['max'])
                 if "options" in v:
                     gen_args[k] = random.choice(v['options'])
+            # Ensure num_frames is always an integer
+            if k == 'num_frames' and isinstance(v, dict):
+                if "min" in v:
+                    gen_args[k] = v['min']
+                elif "max" in v:
+                    gen_args[k] = v['max']
+                else:
+                    gen_args[k] = 24  # Default value
 
         try:
             if generate_at_target_size:
@@ -307,6 +345,10 @@ def _run_generation(
                 gen_args['width'] = gen_args['resolution'][1]
                 del gen_args['resolution']
 
+            # Ensure num_frames is an integer before generation
+            if 'num_frames' in gen_args:
+                gen_args['num_frames'] = int(gen_args['num_frames'])
+
             truncated_prompt = truncate_prompt_if_too_long(prompt, self.model)
             bt.logging.info(f"Generating media from prompt: {truncated_prompt}")
             bt.logging.info(f"Generation args: {gen_args}")
@@ -321,8 +363,14 @@ def _run_generation(
                 pretrained_args = model_config.get('from_pretrained_args', {})
                 torch_dtype = pretrained_args.get('torch_dtype', torch.bfloat16)
                 with torch.autocast(self.device, torch_dtype, cache_enabled=False):
+                    # Clear CUDA cache before generation
+                    torch.cuda.empty_cache()
+                    gc.collect()
                     gen_output = generate(truncated_prompt, **gen_args)
             else:
+                # Clear CUDA cache before generation
+                torch.cuda.empty_cache()
+                gc.collect()
                 gen_output = generate(truncated_prompt, **gen_args)
                 
             gen_time = time.time() - start_time
@@ -334,6 +382,8 @@ def _run_generation(
                     f"default dimensions. Error: {e}"
                 )
                 try:
+                    # Clear CUDA cache before retry
+                    torch.cuda.empty_cache()
                     gen_output = self.model(prompt=truncated_prompt)
                     gen_time = time.time() - start_time
                 except Exception as fallback_error:
@@ -461,5 +511,4 @@ def clear_gpu(self) -> None:
             del self.model
             self.model = None
             gc.collect()
-            torch.cuda.empty_cache()
-
+            torch.cuda.empty_cache()
\ No newline at end of file
diff --git a/bitmind/validator/challenge.py b/bitmind/validator/challenge.py
index 1466cbd2..9cca7724 100644
--- a/bitmind/validator/challenge.py
+++ b/bitmind/validator/challenge.py
@@ -13,7 +13,7 @@
 from bitmind.utils.uids import get_random_uids
 from bitmind.validator.reward import get_rewards
 from bitmind.validator.config import (
-    TARGET_IMAGE_SIZE, 
+    TARGET_IMAGE_SIZE,
     MIN_FRAMES,
     MAX_FRAMES,
     P_STITCH,
@@ -136,7 +136,7 @@ def sample_video_frames(self, video_cache):
             sample['video'] = sample_A['video'] + sample_B['video']
 
         return sample
-      
+    
     def process_metadata(self, sample) -> bool:
         """Prepare challenge metadata and media for logging to Weights & Biases """
         self.metadata = {
@@ -179,4 +179,4 @@ def create_wandb_video(video_frames, fps):
         except Exception as e:
             bt.logging.error(e)
             bt.logging.error(f"{self.modality} is truncated or corrupt. Challenge skipped.")
-            return False
+            return False
\ No newline at end of file
diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py
index 9d7e3333..fc56eefb 100644
--- a/bitmind/validator/config.py
+++ b/bitmind/validator/config.py
@@ -17,7 +17,9 @@
     EulerDiscreteScheduler,
     DEISMultistepScheduler,
     AutoPipelineForInpainting,
-    StableDiffusionInpaintPipeline
+    StableDiffusionInpaintPipeline,
+    CogView4Pipeline,
+    CogVideoXImageToVideoPipeline
 )
 
 from .model_utils import (
@@ -114,7 +116,8 @@ class Modality(StrEnum):
         {"path": "bitmind/lfw"},
         {"path": "bitmind/caltech-256"},
         {"path": "bitmind/caltech-101"},
-        {"path": "bitmind/dtd"}
+        {"path": "bitmind/dtd"},
+        {"path": "bitmind/idoc-mugshots-images"}
     ],
     "semisynthetic": [
         {"path": "bitmind/face-swap"}
@@ -143,6 +146,21 @@ class Modality(StrEnum):
 
 # Text-to-image model configurations
 T2I_MODELS: Dict[str, Dict[str, Any]] = {
+    "THUDM/CogView4-6B": {
+        "pipeline_cls": CogView4Pipeline,
+        "from_pretrained_args": {
+            "torch_dtype": torch.bfloat16,
+            "use_safetensors": True
+        },
+        "generate_args": {
+            "guidance_scale": 3.5,
+            "num_images_per_prompt": 1,
+            "num_inference_steps": 50,
+            "width": 512,
+            "height": 512
+        },
+        "use_autocast": False
+    },
     "stabilityai/stable-diffusion-xl-base-1.0": {
         "pipeline_cls": StableDiffusionXLPipeline,
         "from_pretrained_args": {
@@ -304,12 +322,6 @@ class Modality(StrEnum):
         "scheduler": {
             "cls": DEISMultistepScheduler
         }
-    },
-    "stable-diffusion-v1-5/stable-diffusion-inpainting": {
-        "pipeline_cls": StableDiffusionInpaintPipeline,
-        "generate_args": {
-            "num_inference_steps": {"min": 40, "max": 60},
-        }
     }
 }
 I2I_MODEL_NAMES: List[str] = list(I2I_MODELS.keys())
@@ -407,13 +419,16 @@ class Modality(StrEnum):
 }
 T2V_MODEL_NAMES: List[str] = list(T2V_MODELS.keys())
 
+# Image-to-video model configurations
+I2V_MODELS: Dict[str, Dict[str, Any]] = {}
+I2V_MODEL_NAMES: List[str] = list(I2V_MODELS.keys())
+
 # Combined model configurations
-MODELS: Dict[str, Dict[str, Any]] = {**T2I_MODELS, **I2I_MODELS, **T2V_MODELS}
+MODELS: Dict[str, Dict[str, Any]] = {**T2I_MODELS, **I2I_MODELS, **T2V_MODELS, **I2V_MODELS}
 MODEL_NAMES: List[str] = list(MODELS.keys())
 
-
 def get_modality(model_name):
-     if model_name in T2V_MODEL_NAMES:
+     if model_name in T2V_MODEL_NAMES + I2V_MODEL_NAMES:
         return Modality.VIDEO
      elif model_name in T2I_MODEL_NAMES + I2I_MODEL_NAMES:
         return Modality.IMAGE
@@ -421,7 +436,7 @@ def get_modality(model_name):
 def get_output_media_type(model_name):
      if model_name in I2I_MODEL_NAMES:
         return MediaType.SEMISYNTHETIC
-     elif model_name in T2I_MODEL_NAMES + T2V_MODEL_NAMES:
+     elif model_name in T2I_MODEL_NAMES + T2V_MODEL_NAMES + I2V_MODEL_NAMES:
         return MediaType.SYNTHETIC
 
 def get_task(model_name):
@@ -431,15 +446,17 @@ def get_task(model_name):
         return 't2i'
     elif model_name in I2I_MODEL_NAMES:
         return 'i2i'
+    elif model_name in I2V_MODEL_NAMES:
+        return 'i2v'
 
 
 def select_random_model(task: Optional[str] = None) -> str:
     """
-    Select a random text-to-image or text-to-video model based on the specified
+    Select a random text-to-image, text-to-video, image-to-image, or image-to-video model based on the specified
     modality.
 
     Args:
-        modality: The type of model to select ('t2v', 't2i', 'i2i', or 'random').
+        modality: The type of model to select ('t2v', 't2i', 'i2i', 'i2v', or 'random').
             If None or 'random', randomly chooses between the valid options
 
     Returns:
@@ -457,5 +474,9 @@ def select_random_model(task: Optional[str] = None) -> str:
         return np.random.choice(T2V_MODEL_NAMES)
     elif task == 'i2i':
         return np.random.choice(I2I_MODEL_NAMES)
+    elif task == 'i2v':
+        if not I2V_MODEL_NAMES:
+            raise NotImplementedError("I2V models are not currently configured")
+        return np.random.choice(I2V_MODEL_NAMES)
     else:
         raise NotImplementedError(f"Unsupported task: {task}")
\ No newline at end of file
diff --git a/bitmind/validator/verify_models.py b/bitmind/validator/verify_models.py
index ff79c295..ff80c9e0 100644
--- a/bitmind/validator/verify_models.py
+++ b/bitmind/validator/verify_models.py
@@ -23,10 +23,10 @@ def is_model_cached(model_name):
 
     # Check if the model directory exists
     if os.path.isdir(model_path):
-        bt.logging.info(f"{model_name} is in HF cache. Skipping....")
+        print(f"{model_name} is in HF cache. Skipping....")
         return True
     else:
-        bt.logging.info(f"{model_name} is not cached. Downloading....")
+        print(f"{model_name} is not cached. Downloading....")
         return False
 
 
diff --git a/requirements.txt b/requirements.txt
index eb68fdf4..947e04ef 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,7 @@ scikit-learn==1.5.2
 
 # Deep learning tools
 transformers==4.48.0
-diffusers==0.32.2
+diffusers==0.33.1
 accelerate==1.2.0
 bitsandbytes==0.45.0
 sentencepiece==0.2.0