From 59653b9ba6e17db306aa48a5c5b9436dfa81515d Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 1 Apr 2025 11:38:10 -0700 Subject: [PATCH 01/15] Release 2.2.8 (#180) * Validator Proxy Response Update (#103) * adding rich arg, adding coldkeys and hotokeys * moving rich to payload from headers * bump version --------- Co-authored-by: benliang99 * Two new image models: SDXL finetuned on Midjourney, and SD finetuned on anime images * Added required StableDiffusionPipeline import * Updated transformers version to fix tokenizer initialization error * GPU Specification (#108) * Made gpu id specification consistent across synthetic image generation models * Changed gpu_id to device * Docstring grammar * add neuron.device to SyntheticImageGenerator init * Fixed variable names * adding device to start_validator.sh * deprecating old/biased random prompt generation * properly clear gpu of moderation pipeline * simplifying usage of self.device * fixing moderation pipeline device * explicitly defining model/tokenizer for moderation pipeline to avoid accelerate auto device management * deprecating random prompt generation --------- Co-authored-by: benliang99 * Update __init__.py bump version * removing logging * old logging removed * adding check for state file in case it is deleted somehow * removing remaining random prompt generation code * [Testnet] Video Challenges V1 (#111) * simple video challenge implementation wip * dummy multimodal miner * constants reorg * updating verify_models script with t2v * fixing MODEL_PIPELINE init * cleanup * __init__.py * hasattr fix * num_frames must be divisible by 8 * fixing dict iteration * dummy response for videos * fixing small bugs * fixing video logging and compression * apply image transforms uniformly to frames of video * transform list of tensor to pil for synapse prep * cleaning up vali forward * miner function signatures to use Synapse base class instead of ImageSynapse * vali requirements imageio and moviepy * attaching separate video and image forward functions * separating blacklist and priority fns for image/video synapses * pred -> prediction * initial synth video challenge flow * initial video cache implementation * video cache cleanup * video zip downloads * wip fairly large refactor of data generation, functionality and form * generalized hf zip download fn * had claude improve video_cache formatting * vali forward cleanup * cleanup + turning back on randomness for real/fake * fix relative import * wip moving video datasets to vali config * Adding optimization flags to vali config * check if captioning model already loaded * async SyntheticDataGenerator wip * async zip download * ImageCache wip * proper gpu clearing for moderation pipeline * sdg cleanup * new cache system WIP * image/video cache updates * cleaning up unused metadata arg, improving logging * fixed frame sampling, parquet image extraction, image sampling * synth data cache wip * Moving sgd to its own pm2 process * synthetic data gen memory management update * mochi-1-preview * util cleanup, new requirements * ensure SyntheticDataGenerator process waits for ImageCache to populate * adding new t2i models from main * Fixing t2v model output saving * miner cleanup * Moving tall model weights to bitmind hf org * removing test video pkl * fixing circular import * updating usage of hf_hub_download according to some breaking huggingface_hub changes * adding ffmpeg to vali reqs * adding back in video models in async generation after testing * renaming UCF directory to DFB, since it now contains TALL * remaining renames for UCF -> DFB * pyffmpegg * video compatible data augmentations * Default values for level, data_aug_params for failure case * switching image challenges back on * using sample variable to store data for all challenge types * disabling sequential_cpu_offload for CogVideoX5b * logging metadata fields to w&b * log challenge metadata * bump version * adding context manager for generation w different dtypes * variable name fix in ComposeWithTransforms * fixing broken DFB stuff in tall_detector.py * removing unnecessary logging * fixing outdated variable names * cache refactor; moving shared functionality to BaseCache * finally automating w&b project setting * improving logs * improving validator forward structure * detector ABC cleanup + function headers * adding try except for miner performance history loading * fixing import * cleaning up vali logging * pep8 formatting video_utils * cleaning up start_validator.sh, starting validator process before data gen * shortening vali challenge timer * moving data generation management to its own script & added w&B logging * run_data_generator.py * fixing full_path variable name * changing w&b name for data generator * yaml > json gang * simplifying ImageCache.sample to always return one sample * adding option to skip a challenge if no data are available in cache * adding config vars for image/video detector * cleaning up miner class, moving blacklist/priority to base * updating call to image_cache.sample() * fixing mochi gen to 84 frames * fixing video data padding for miners * updating setup script to create new .env file * fixing weight loading after detector refactor * model/detector separation for TALL & modifying base DFB code to allow device configuration * standardizing video detector input to a frames tensor * separation of concerns; moving all video preprocessing to detector class * pep8 cleanup * reformatting if statements * temporarily removing initial dataset class * standardizing config loading across video and image models * finished VideoDataloader and supporting components * moved save config file out of trian script * backwards compatibility for ucf training * moving data augmentation from RealFakeDataset to Dataset subclasses for video aug support * cleaning up data augmentation and target_image_size * import cleanup * gitignore update * fixing typos picked up by flake8 * fixing function name ty flake8 * fixing test fixtures * disabling pytests for now, some are broken after refactor and its 4am * fixing image_size for augmentations * Updated validator gpu requirements (#113) * splitting rewards over image and video (#112) * Update README.md (#110) * combining requirements files * Combined requirements installation * Improved formatting, added checks to prevent overwriting existing .env files. * Re-added endpoint options * Fixed incorrect diffusers install * Fixed missing initialization of miner performance trackers * [Testnet] Docs Updates (#114) * docs updates * mining docs update * Removed deprecated requirements files from github tests (#118) * [Testnet] Async Cache Updates (#119) * breaking out cache updates into their own process * adding retries for loading vali info * moving device config to data generation process * typo * removing old run_updater init arg, fixing dataset indexing * only download 1 zip to start to provide data for vali on first boot * cache deletion functionality * log cache size * name images with dataset prefix * Increased minimum and recommended storage (#120) * [Testnet] Data download cleanup (#121) * moving download_data.py to base_miner/datasets * removing unused args in download_data * constants -> config * docs updates for new paths * updating outdated fn headers * pep8 * use png codec, sample by framerate + num frames * fps, min_fps, max_fps parameterization of sample * return fps and num frames * Fix registry module imports (#123) * Fix registry module imports * Fixing config loading issues * fixing frame sampling * bugfix * print label on testnet * reenabling model verification * update detector class names * Fixing config_name arg for camo * fixing detector config in camo * fixing ref to self.config_name * udpate default frame rate * vidoe dataset creation example * default config for video datasets * update default num_videosg --------- Co-authored-by: Andrew * Update README.md * README title * removing samples from cache * README * fixing cache removal (#125) * Fixed tensor not being set to device for video challenges, causing errors when using cuda (#126) * Mainnet Prep (#127) * resetting challenge timer to 60s * fix logging for miner history loading * randomize model order, log gen time * remove frame limit * separate logging to after data check * generate with batch=1 first for diverse data availability * load v1 history path for smooth transition to new incentive * prune extracted cache * swapping url open-images for jpg * removing unused config args * shortening cache refresh timer * cache optimizations * typo * better variable naming * default to autocast * log num files in cache along with GB * surfacing max size gb variables * cooked typo * Fixed wrong validation split key string causing no transform to be applied * Changed detector arg to be required * fixing hotkey reset check * removing logline * clamp mcc at 0 so video doesn't negatively impact performant image miners * typo * improving cache logs * prune after clear * only update relevant tracker in reward * improved logging, turned off cache removal in sample() --------- Co-authored-by: Andrew * removign old reqs from autoupdate * Re-added bitmind HF org prefix to dataset path * shortening self heal timer * autoupdate * autoupdate * sample size * Validator Improvements: VRAM usage, logging (#131) * ensure vali process and cache update process do not consume any vram * skip challenge if unable to create wandb Image/Video object (indicating corrupt file) * manually set log level to info * removing debug print * enable_info in config * cleanup * version bump * moved info log setting to config.py * Bittensor 8.5.1 (#133) * bittensor 8.5.1 * bump package versoin * Prompt Generation Pipeline Improvements (#135) * Release 2.0.3 (#134) Bittensor 8.5.1 * enhancing prompts by adding conveyed motion with llama * Mining docs fix setup_miner_env.sh -> setup_env.sh * [testnet] I2i/in painting (#137) * Initial i2i constants for in-painting * Initial in painting functionality with mask (oval/rectangle) and annotation generation * Refactor ipg to match sdg format, added caching and support for selecting from multiple in-painting models * Fixed cache import, updated test script * Separate cache for i2i when using run_data_generator * Renamed synth cache constants, added support for multiple validator synth caches, and selection between i2i (20%) and t2i (80%) in forward * Unifying InPaintingGenerator and SyntheticDataGenerator (#136) * WIP, unifying InpaintingGenerator and SyntheticDataGenerator * minor simplification of forward flow * simplifying forward flow * standardizing cache structures with the introduction of task type subdirs * adding i2i models to batch generation * removing depracted InPaintingGenerator from run script * adding --clear-cache option for validator * updating SDG init params * fixing last imports + directory structure references * fixing images passed to generate function for i2i * option to log masks/original images for i2i challenges * fixing help hint for output-dir --------- Co-authored-by: Andrew * Updated image_annotation_generator to prompt_generator (#138) * bump version 2.0.3 -> 2.1.0 * testing cache clearing via autoupdate * cranking up video rewards to .2 * Add DeepFloyd/IF model and multi-stage pipeline support Added DeepFloyd/IF-I-XL + IF-II-L model configuration, pipeline_stages configuration for multi-stage models * Moved multistage pipeline generator to SyntheticDataGenerator * Args for testing specific model * [TESTNET] HunyuanVideo (#140) * hunyuan video initial commit * delete resolution from from_pretrained_args after extracting h,w * model_id arg for from_pretrained * standardizing model_id usage * fixing autocast and torch_dtype for hunyuan * adding resolution options and save options for all t2v models * missing comma in config * Update __init__.py * updated subnet arch diagram * README wip * docs udpates * README updates * README updates * more README udpates * README updates * README udpates * README cleanup * more README updates * Fixing table border removal html for github * fixing table html * one last attempt at a prettier table * one last last attempt at a prettier table * bumping video rewards * removing decay for unsampled miners * README cleanup * increasing suggested and min compute for validators * README update, markdown fix in Incentive.md * README tweak * removing redundant dereg check from update_scores * Deepfloyed specific configs, args for better cache/data gen testing, multistage pipeline i/o * use largest deepfloyed-if I and II models, ensure no watermarker * Fixed FLUX resolution format, added back model_id and scheduler loading for video models * Add Janus-Pro-7B t2i model with custom diffuser pipeline class * Janus repo install * Removed custom wrapper files, added Janus DiffusionPipeline wrapper to model_utils, cleaned up configs * Removed DiffusionPipeline import * Uncomment wandb inits * Move create_pipeline_generator() to model utils * Moved model optimizations to model utils * [Testnet] Mutli-Video Challenges (#148) * Implementation of frame stitching for 2 videos * ComposeWithParams fix * vflip + hflip fix * wandb video logging fix courtesy of eric * proper arg passing for prompt moderation * version bump * i2i crop guardrails * Update config.py Removing problematic resolution for CogVideoX5b * explicit requirements install * moving pm2 process stopping prior to model verification * fix for no available vidoes in multi-video challenge generation * Update forward.py Mutli-video threshold 0.2 * [Testnet] Multiclass Rewards (#150) * multiclass protocols * multiclass rewards * facilitating smooth transition from old protocol to multiclass * DTAO: Bittensor SDK 9.0.0 (#152) * Update requirements.txt * version bump * moving prediction backwards compatibility to synapase.deserialize * mcc-based reward with rational transform * cast predictions to np array upon loading miner history * version bump * [Testnet] video organics (#151) * improved vali proxy with video endpoint * renaming endpoints * Fixing vali proxy initialization * make vali proxy async again * handling testnet situation of low miner activity * BytesIO import * upgrading transformers * switching to multipart form data * Validator Proxy handling of Multiclass Responses (#153) * udpate vali proxy to return floats instead of vectors * removing rational transform for now * new incentive docs (#154) * python-multipart * Semisynthetic Cache (#158) * new cache structure and related config vars * refactored vai forward to be more modular * cleanup * restructure wip * added dataset to cache dir hierachy, cleaned up data classes, better error reporting for missing frames * fixing cache access order * bugfixes for semisynthetic image cache and safer pruning * config and logging cleanup * cache clear for this release --------- Co-authored-by: Dylan Uys * version bump * Changing mutliclass reward weight to .25 * uncommenting dlib * bittensor==9.0.3 * Handling datasets with few files that don't need regular local updates * fixing error logging variable names * dreamshaper-8-inpainting (#161) * Vali/sd v15 inpainting (#162) * inpainting pipeline import * removing cache clear from autoupdate * eidon data * version bump * removing filetype key from bm-eidon-image * refresh cache * [Testnet] Broken Pipes Fix (#166) * improved dendrite class with proper connection pool management to deal w these pesky broken pipes * logging and indendation * version bump * updating connection pool config * removing cache clear * reward transform + logging updates * Added LORA model support * Added JourneyDB static synthetic dataset * Added GenImage Midjourney synthetic image dataset * Fixed dataset name * Added import for SYNTH_IMAGE_CACHE_DIR * Typo * merging scores fix to testnet * fix for augmented video logging (#177) * wanbd cleanup * logging update, testing autoupdate * making wandb cache clears periodic regardless of autoupdate/self heal * move wandb cache cleaning to its own dir * typo * bump version * increasing min stake * version bump * typo * still no scores for blacklisted vaidators * leaving vpermit_tao_limit as default and adding explicitly to blacklist to avoid side effects --------- Co-authored-by: benliang99 Co-authored-by: Andrew Co-authored-by: Kenobi <108417131+kenobijon@users.noreply.github.com> Co-authored-by: Dylan Uys --- bitmind/__init__.py | 2 +- bitmind/base/miner.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bitmind/__init__.py b/bitmind/__init__.py index e9974888..09fc5e90 100644 --- a/bitmind/__init__.py +++ b/bitmind/__init__.py @@ -18,7 +18,7 @@ # DEALINGS IN THE SOFTWARE. -__version__ = "2.2.7" +__version__ = "2.2.8" version_split = __version__.split(".") __spec_version__ = ( (1000 * int(version_split[0])) diff --git a/bitmind/base/miner.py b/bitmind/base/miner.py index e63d70a1..a2228c30 100644 --- a/bitmind/base/miner.py +++ b/bitmind/base/miner.py @@ -236,7 +236,7 @@ async def blacklist( if self.config.blacklist.force_validator_permit: # If the config is set to force validator permit, then we should only allow requests from validators. - if not self.metagraph.validator_permit[uid]: + if not self.metagraph.validator_permit[uid] or self.metagraph.S[uid] < 30000: bt.logging.warning( f"Blacklisting a request from non-validator hotkey {synapse.dendrite.hotkey}" ) From 6209d93c0cd837cbc14ec29bb139b3be2f045d45 Mon Sep 17 00:00:00 2001 From: benliang99 Date: Thu, 10 Apr 2025 12:12:30 +0000 Subject: [PATCH 02/15] feat(i2v): add image-to-video generation support with motion-enhanced prompts - Add CogVideoX1.5-5B-I2V model config - Enhance prompt generation with task-specific motion descriptions - Add memory optimizations for video generation - Update task handling across generator pipeline --- .../prompt_generator.py | 34 +++-- .../synthetic_data_generator.py | 118 ++++++++++++++++-- bitmind/validator/challenge.py | 2 +- bitmind/validator/config.py | 60 +++++++-- requirements.txt | 2 +- 5 files changed, 186 insertions(+), 30 deletions(-) diff --git a/bitmind/synthetic_data_generation/prompt_generator.py b/bitmind/synthetic_data_generation/prompt_generator.py index a8b8bc09..980df06d 100644 --- a/bitmind/synthetic_data_generation/prompt_generator.py +++ b/bitmind/synthetic_data_generation/prompt_generator.py @@ -118,6 +118,7 @@ def clear_gpu(self) -> None: def generate( self, image: Image.Image, + task: Optional[str] = None, max_new_tokens: int = 20, verbose: bool = False ) -> str: @@ -127,6 +128,8 @@ def generate( Args: image: The image for which the description is to be generated. + task: The generation task ('t2i', 't2v', 'i2i', 'i2v'). If video task, + motion descriptions will be added. max_new_tokens: The maximum number of tokens to generate for each prompt. verbose: If True, additional logging information is printed. @@ -185,7 +188,10 @@ def generate( description += '.' moderated_description = self.moderate(description) - return self.enhance(moderated_description) + + if task in ['t2v', 'i2v']: + return self.enhance(moderated_description) + return moderated_description def moderate(self, description: str, max_new_tokens: int = 80) -> str: """ @@ -251,16 +257,18 @@ def enhance(self, description: str, max_new_tokens: int = 80) -> str: "adding natural motion and temporal elements while preserving the " "core scene. Follow these rules:\n" "1. Maintain the essential elements of the original description\n" - "2. Add smooth, continuous motions that work well in video\n" - "3. For portraits: Add natural facial movements or expressions\n" - "4. For non-portrait images with people: Add contextually appropriate " - "actions (e.g., for a beach scene, people might be walking along " - "the shoreline or playing in the waves; for a cafe scene, people " - "might be sipping drinks or engaging in conversation)\n" - "5. For landscapes: Add environmental motion like wind or water\n" - "6. For urban scenes: Add dynamic elements like people or traffic\n" - "7. Keep the description concise but descriptive\n" - "8. Focus on gradual, natural transitions\n" + "2. Add EXACTLY ONE camera motion (e.g., 'camera slowly zooming in', " + "'camera gently panning left to right', 'camera smoothly circling')\n" + "3. Add EXACTLY ONE ambient motion based on the scene type:\n" + " - For portraits: Add subtle facial expressions or micro-movements\n" + " - For people: Add natural body language or gestures\n" + " - For landscapes: Add environmental motion (wind, clouds, water)\n" + " - For urban scenes: Add atmospheric elements (light, shadows)\n" + " - For objects: Add gentle environmental interaction\n" + "4. Keep the total description under 75 words\n" + "5. Format: '{original description}, {camera motion}, {ambient motion}'\n" + "6. Use smooth, gradual terms like 'gently', 'slowly', 'subtly'\n" + "7. NEVER add new subjects or major scene changes\n" "Only respond with the enhanced description.[/INST]" ) }, @@ -280,5 +288,5 @@ def enhance(self, description: str, max_new_tokens: int = 80) -> str: return enhanced_text[0]['generated_text'] except Exception as e: - print(f"An error occurred during motion enhancement: {e}") - return description + bt.logging.error(f"An error occurred during motion enhancement: {e}") + return description \ No newline at end of file diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index 19b60607..0ebc89b9 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -152,7 +152,12 @@ def batch_generate(self, batch_size: int = 5) -> None: image_sample = self.image_cache.sample() images.append(image_sample['image']) bt.logging.info(f"Sampled image {i+1}/{batch_size} for captioning: {image_sample['path']}") - prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1)) + task = get_task(self.model_name) if self.model_name else None + prompts.append(self.generate_prompt( + image=image_sample['image'], + clear_gpu=i==batch_size-1, + task=task + )) bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}") # If specific model is set, use only that model @@ -222,7 +227,7 @@ def generate( ValueError: If real_image is None when using annotation prompt type. NotImplementedError: If prompt type is not supported. """ - prompt = self.generate_prompt(image, clear_gpu=True) + prompt = self.generate_prompt(image, clear_gpu=True, task=task) bt.logging.info("Generating synthetic data...") gen_data = self._run_generation(prompt, task, model_name, image) self.clear_gpu() @@ -231,7 +236,8 @@ def generate( def generate_prompt( self, image: Optional[Image.Image] = None, - clear_gpu: bool = True + clear_gpu: bool = True, + task: Optional[str] = None ) -> str: """Generate a prompt based on the specified strategy.""" bt.logging.info("Generating prompt") @@ -241,7 +247,7 @@ def generate_prompt( "image can't be None if self.prompt_type is 'annotation'" ) self.prompt_generator.load_models() - prompt = self.prompt_generator.generate(image) + prompt = self.prompt_generator.generate(image, task=task) if clear_gpu: self.prompt_generator.clear_gpu() else: @@ -261,9 +267,9 @@ def _run_generation( Args: prompt: The text prompt used to inspire the generation. - task: The generation task type ('t2i', 't2v', 'i2i', or None). + task: The generation task type ('t2i', 't2v', 'i2i', 'i2v', or None). model_name: Optional model name to use for generation. - image: Optional input image for image-to-image generation. + image: Optional input image for image-to-image or image-to-video generation. generate_at_target_size: If True, generate at TARGET_IMAGE_SIZE dimensions. Returns: @@ -272,6 +278,10 @@ def _run_generation( Raises: RuntimeError: If generation fails. """ + # Clear CUDA cache before loading model + torch.cuda.empty_cache() + gc.collect() + self.load_model(model_name) model_config = MODELS[self.model_name] task = get_task(model_name) if task is None else task @@ -289,14 +299,38 @@ def _run_generation( gen_args['mask_image'], mask_center = create_random_mask(image.size) gen_args['image'] = image + # prep image-to-video generation args + elif task == 'i2v': + if image is None: + raise ValueError("image cannot be None for image-to-video generation") + # Get target size from gen_args if specified, otherwise use default + target_size = ( + gen_args.get('height', 768), + gen_args.get('width', 768) + ) + if image.size[0] > target_size[0] or image.size[1] > target_size[1]: + image = image.resize(target_size, Image.Resampling.LANCZOS) + gen_args['image'] = image # Prepare generation arguments for k, v in gen_args.items(): if isinstance(v, dict): if "min" in v and "max" in v: - gen_args[k] = np.random.randint(v['min'], v['max']) + # For i2v, use minimum values to save memory + if task == 'i2v': + gen_args[k] = v['min'] + else: + gen_args[k] = np.random.randint(v['min'], v['max']) if "options" in v: gen_args[k] = random.choice(v['options']) + # Ensure num_frames is always an integer + if k == 'num_frames' and isinstance(v, dict): + if "min" in v: + gen_args[k] = v['min'] + elif "max" in v: + gen_args[k] = v['max'] + else: + gen_args[k] = 24 # Default value try: if generate_at_target_size: @@ -307,6 +341,10 @@ def _run_generation( gen_args['width'] = gen_args['resolution'][1] del gen_args['resolution'] + # Ensure num_frames is an integer before generation + if 'num_frames' in gen_args: + gen_args['num_frames'] = int(gen_args['num_frames']) + truncated_prompt = truncate_prompt_if_too_long(prompt, self.model) bt.logging.info(f"Generating media from prompt: {truncated_prompt}") bt.logging.info(f"Generation args: {gen_args}") @@ -321,8 +359,14 @@ def _run_generation( pretrained_args = model_config.get('from_pretrained_args', {}) torch_dtype = pretrained_args.get('torch_dtype', torch.bfloat16) with torch.autocast(self.device, torch_dtype, cache_enabled=False): + # Clear CUDA cache before generation + torch.cuda.empty_cache() + gc.collect() gen_output = generate(truncated_prompt, **gen_args) else: + # Clear CUDA cache before generation + torch.cuda.empty_cache() + gc.collect() gen_output = generate(truncated_prompt, **gen_args) gen_time = time.time() - start_time @@ -334,6 +378,8 @@ def _run_generation( f"default dimensions. Error: {e}" ) try: + # Clear CUDA cache before retry + torch.cuda.empty_cache() gen_output = self.model(prompt=truncated_prompt) gen_time = time.time() - start_time except Exception as fallback_error: @@ -463,3 +509,61 @@ def clear_gpu(self) -> None: gc.collect() torch.cuda.empty_cache() + def generate_from_prompt( + self, + prompt: str, + task: Optional[str] = None, + image: Optional[Image.Image] = None, + generate_at_target_size: bool = False + ) -> Dict[str, Any]: + """Generate synthetic data based on a provided prompt. + + Args: + prompt: The text prompt to use for generation + task: Optional task type ('t2i', 't2v', 'i2i', 'i2v') + image: Optional input image for i2i or i2v generation + generate_at_target_size: If True, generate at TARGET_IMAGE_SIZE dimensions + + Returns: + Dictionary containing generated data information + """ + bt.logging.info(f"Generating synthetic data from provided prompt: {prompt}") + + # Default to t2i if task is not specified + if task is None: + task = 't2i' + + # If model_name is not specified, select one based on the task + if self.model_name is None and self.use_random_model: + bt.logging.warning(f"No model configured. Using random model.") + if task == 't2i': + model_candidates = T2I_MODEL_NAMES + elif task == 't2v': + model_candidates = T2V_MODEL_NAMES + elif task == 'i2i': + model_candidates = I2I_MODEL_NAMES + elif task == 'i2v': + model_candidates = I2V_MODEL_NAMES + else: + raise ValueError(f"Unsupported task: {task}") + + self.model_name = random.choice(model_candidates) + + # Validate input image for tasks that require it + if task in ['i2i', 'i2v'] and image is None: + raise ValueError(f"Input image is required for {task} generation") + + # Run the generation with the provided prompt + gen_data = self._run_generation( + prompt=prompt, + task=task, + model_name=self.model_name, + image=image, + generate_at_target_size=generate_at_target_size + ) + + # Clean up GPU memory + self.clear_gpu() + + return gen_data + diff --git a/bitmind/validator/challenge.py b/bitmind/validator/challenge.py index 1466cbd2..b9767cf5 100644 --- a/bitmind/validator/challenge.py +++ b/bitmind/validator/challenge.py @@ -13,7 +13,7 @@ from bitmind.utils.uids import get_random_uids from bitmind.validator.reward import get_rewards from bitmind.validator.config import ( - TARGET_IMAGE_SIZE, + TARGET_IMAGE_SIZE, MIN_FRAMES, MAX_FRAMES, P_STITCH, diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index 9d7e3333..f9aaff33 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -17,7 +17,9 @@ EulerDiscreteScheduler, DEISMultistepScheduler, AutoPipelineForInpainting, - StableDiffusionInpaintPipeline + StableDiffusionInpaintPipeline, + CogView4Pipeline, + CogVideoXImageToVideoPipeline ) from .model_utils import ( @@ -114,7 +116,8 @@ class Modality(StrEnum): {"path": "bitmind/lfw"}, {"path": "bitmind/caltech-256"}, {"path": "bitmind/caltech-101"}, - {"path": "bitmind/dtd"} + {"path": "bitmind/dtd"}, + {"path": "bitmind/idoc-mugshots"} ], "semisynthetic": [ {"path": "bitmind/face-swap"} @@ -143,6 +146,21 @@ class Modality(StrEnum): # Text-to-image model configurations T2I_MODELS: Dict[str, Dict[str, Any]] = { + "THUDM/CogView4-6B": { + "pipeline_cls": CogView4Pipeline, + "from_pretrained_args": { + "torch_dtype": torch.bfloat16, + "use_safetensors": True + }, + "generate_args": { + "guidance_scale": 3.5, + "num_images_per_prompt": 1, + "num_inference_steps": 50, + "width": 512, + "height": 512 + }, + "use_autocast": False + }, "stabilityai/stable-diffusion-xl-base-1.0": { "pipeline_cls": StableDiffusionXLPipeline, "from_pretrained_args": { @@ -407,13 +425,35 @@ class Modality(StrEnum): } T2V_MODEL_NAMES: List[str] = list(T2V_MODELS.keys()) +# Image-to-video model configurations +I2V_MODELS: Dict[str, Dict[str, Any]] = { + "THUDM/CogVideoX1.5-5B-I2V": { + "pipeline_cls": CogVideoXImageToVideoPipeline, + "from_pretrained_args": { + "use_safetensors": True, + "torch_dtype": torch.bfloat16 + }, + "generate_args": { + "guidance_scale": 2, + "num_videos_per_prompt": 1, + "num_inference_steps": {"min": 50, "max": 125}, + "num_frames": {"min": 48} + }, + "save_args": {"fps": 8}, + "enable_model_cpu_offload": True, + "vae_enable_slicing": True, + "vae_enable_tiling": True + } +} +I2V_MODEL_NAMES: List[str] = list(I2V_MODELS.keys()) + # Combined model configurations -MODELS: Dict[str, Dict[str, Any]] = {**T2I_MODELS, **I2I_MODELS, **T2V_MODELS} +MODELS: Dict[str, Dict[str, Any]] = {**T2I_MODELS, **I2I_MODELS, **T2V_MODELS, **I2V_MODELS} MODEL_NAMES: List[str] = list(MODELS.keys()) def get_modality(model_name): - if model_name in T2V_MODEL_NAMES: + if model_name in T2V_MODEL_NAMES + I2V_MODEL_NAMES: return Modality.VIDEO elif model_name in T2I_MODEL_NAMES + I2I_MODEL_NAMES: return Modality.IMAGE @@ -421,7 +461,7 @@ def get_modality(model_name): def get_output_media_type(model_name): if model_name in I2I_MODEL_NAMES: return MediaType.SEMISYNTHETIC - elif model_name in T2I_MODEL_NAMES + T2V_MODEL_NAMES: + elif model_name in T2I_MODEL_NAMES + T2V_MODEL_NAMES + I2V_MODEL_NAMES: return MediaType.SYNTHETIC def get_task(model_name): @@ -431,15 +471,17 @@ def get_task(model_name): return 't2i' elif model_name in I2I_MODEL_NAMES: return 'i2i' + elif model_name in I2V_MODEL_NAMES: + return 'i2v' def select_random_model(task: Optional[str] = None) -> str: """ - Select a random text-to-image or text-to-video model based on the specified + Select a random text-to-image, text-to-video, image-to-image, or image-to-video model based on the specified modality. Args: - modality: The type of model to select ('t2v', 't2i', 'i2i', or 'random'). + modality: The type of model to select ('t2v', 't2i', 'i2i', 'i2v', or 'random'). If None or 'random', randomly chooses between the valid options Returns: @@ -449,7 +491,7 @@ def select_random_model(task: Optional[str] = None) -> str: NotImplementedError: If the specified modality is not supported. """ if task is None or task == 'random': - task = np.random.choice(['t2i', 'i2i', 't2v']) + task = np.random.choice(['t2i', 'i2i', 't2v', 'i2v']) if task == 't2i': return np.random.choice(T2I_MODEL_NAMES) @@ -457,5 +499,7 @@ def select_random_model(task: Optional[str] = None) -> str: return np.random.choice(T2V_MODEL_NAMES) elif task == 'i2i': return np.random.choice(I2I_MODEL_NAMES) + elif task == 'i2v': + return np.random.choice(I2V_MODEL_NAMES) else: raise NotImplementedError(f"Unsupported task: {task}") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index eb68fdf4..d5ec9659 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ scikit-learn==1.5.2 # Deep learning tools transformers==4.48.0 -diffusers==0.32.2 +git+https://github.com/huggingface/diffusers.git@506f39af3a7b533209cc96f1732fff347070bdbd accelerate==1.2.0 bitsandbytes==0.45.0 sentencepiece==0.2.0 From a8e6b3992bd1e4879f1b61b1bb36a3cf4e20b304 Mon Sep 17 00:00:00 2001 From: benliang99 Date: Thu, 10 Apr 2025 12:20:46 +0000 Subject: [PATCH 03/15] Updated name of mugshots dataset --- bitmind/validator/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index f9aaff33..c3e4df76 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -117,7 +117,7 @@ class Modality(StrEnum): {"path": "bitmind/caltech-256"}, {"path": "bitmind/caltech-101"}, {"path": "bitmind/dtd"}, - {"path": "bitmind/idoc-mugshots"} + {"path": "bitmind/idoc-mugshots-images"} ], "semisynthetic": [ {"path": "bitmind/face-swap"} From 7be293c78e98b20ad16f65fda4d95a439b5c29d7 Mon Sep 17 00:00:00 2001 From: benliang99 Date: Thu, 10 Apr 2025 12:25:09 +0000 Subject: [PATCH 04/15] Version bump --- bitmind/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitmind/__init__.py b/bitmind/__init__.py index 09fc5e90..b0552aa1 100644 --- a/bitmind/__init__.py +++ b/bitmind/__init__.py @@ -18,7 +18,7 @@ # DEALINGS IN THE SOFTWARE. -__version__ = "2.2.8" +__version__ = "2.2.9" version_split = __version__.split(".") __spec_version__ = ( (1000 * int(version_split[0])) From 839ea1393fe1f2e6eb8fa2e6114b3b0a442ca17c Mon Sep 17 00:00:00 2001 From: benliang99 Date: Fri, 11 Apr 2025 19:29:59 +0000 Subject: [PATCH 05/15] Adjusted i2v video dimensions --- bitmind/validator/config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index c3e4df76..2f4674bf 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -437,7 +437,9 @@ class Modality(StrEnum): "guidance_scale": 2, "num_videos_per_prompt": 1, "num_inference_steps": {"min": 50, "max": 125}, - "num_frames": {"min": 48} + "num_frames": {"min": 48}, + "height": 768, + "width": 768, }, "save_args": {"fps": 8}, "enable_model_cpu_offload": True, From 87c8264c7a6060f0963966e89e51d4091510b973 Mon Sep 17 00:00:00 2001 From: benliang99 Date: Fri, 11 Apr 2025 20:10:48 +0000 Subject: [PATCH 06/15] Adjusted num_frames to match CogVideoXImageToVideo source code default --- bitmind/validator/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index 2f4674bf..9dd9c8ba 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -437,7 +437,7 @@ class Modality(StrEnum): "guidance_scale": 2, "num_videos_per_prompt": 1, "num_inference_steps": {"min": 50, "max": 125}, - "num_frames": {"min": 48}, + "num_frames": 49, "height": 768, "width": 768, }, From 85cbddc9f32bc839cc2e24e7af4ef1ea3accd5a8 Mon Sep 17 00:00:00 2001 From: benliang99 Date: Fri, 11 Apr 2025 20:53:54 +0000 Subject: [PATCH 07/15] refactor(prompt): simplify video prompt enhancement rules - Reduce complexity of motion instructions - Add logic to detect and preserve existing motion - Improve handling of static vs dynamic subjects --- .../prompt_generator.py | 34 +++++++------------ 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/bitmind/synthetic_data_generation/prompt_generator.py b/bitmind/synthetic_data_generation/prompt_generator.py index 980df06d..cd7823e6 100644 --- a/bitmind/synthetic_data_generation/prompt_generator.py +++ b/bitmind/synthetic_data_generation/prompt_generator.py @@ -239,36 +239,28 @@ def enhance(self, description: str, max_new_tokens: int = 80) -> str: """ Enhance a static image description to make it suitable for video generation by adding dynamic elements and motion. - + Args: description: The static image description to enhance. max_new_tokens: Maximum number of new tokens to generate in the enhanced text. - + Returns: - An enhanced description suitable for video generation, or the original - description if enhancement fails. + An enhanced description suitable for video generation. """ messages = [ { "role": "system", "content": ( - "[INST]You are an expert at converting static image descriptions " - "into dynamic video prompts. Enhance the given description by " - "adding natural motion and temporal elements while preserving the " - "core scene. Follow these rules:\n" - "1. Maintain the essential elements of the original description\n" - "2. Add EXACTLY ONE camera motion (e.g., 'camera slowly zooming in', " - "'camera gently panning left to right', 'camera smoothly circling')\n" - "3. Add EXACTLY ONE ambient motion based on the scene type:\n" - " - For portraits: Add subtle facial expressions or micro-movements\n" - " - For people: Add natural body language or gestures\n" - " - For landscapes: Add environmental motion (wind, clouds, water)\n" - " - For urban scenes: Add atmospheric elements (light, shadows)\n" - " - For objects: Add gentle environmental interaction\n" - "4. Keep the total description under 75 words\n" - "5. Format: '{original description}, {camera motion}, {ambient motion}'\n" - "6. Use smooth, gradual terms like 'gently', 'slowly', 'subtly'\n" - "7. NEVER add new subjects or major scene changes\n" + "[INST]You are an expert at converting image descriptions into video prompts. " + "Analyze the existing motion in the scene and enhance it naturally:\n" + "1. If motion exists in the image (falling, throwing, running, etc.):\n" + " - Maintain and emphasize that existing motion\n" + " - Add smooth continuation of the movement\n" + "2. If the subject is static (sitting, standing, placed):\n" + " - Keep it stable\n" + " - Add minimal environmental motion if appropriate\n" + "3. Add ONE subtle camera motion that complements the scene\n" + "4. Keep the description concise and natural\n" "Only respond with the enhanced description.[/INST]" ) }, From 51b33cff2399e8c7d28903dca6a992a35e5c89f3 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Sat, 12 Apr 2025 13:07:09 -0700 Subject: [PATCH 08/15] missing import --- bitmind/synthetic_data_generation/synthetic_data_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index 0ebc89b9..809ec89e 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -24,6 +24,7 @@ T2V_MODEL_NAMES, T2I_MODEL_NAMES, I2I_MODEL_NAMES, + I2V_MODEL_NAMES, TARGET_IMAGE_SIZE, select_random_model, get_task, From 2104e71a829f70330b2b3170bdd578de44a48cf7 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Sat, 12 Apr 2025 13:17:38 -0700 Subject: [PATCH 09/15] removing generate_from_prompt --- .../synthetic_data_generator.py | 61 +------------------ 1 file changed, 1 insertion(+), 60 deletions(-) diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index 809ec89e..11ccf214 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -508,63 +508,4 @@ def clear_gpu(self) -> None: del self.model self.model = None gc.collect() - torch.cuda.empty_cache() - - def generate_from_prompt( - self, - prompt: str, - task: Optional[str] = None, - image: Optional[Image.Image] = None, - generate_at_target_size: bool = False - ) -> Dict[str, Any]: - """Generate synthetic data based on a provided prompt. - - Args: - prompt: The text prompt to use for generation - task: Optional task type ('t2i', 't2v', 'i2i', 'i2v') - image: Optional input image for i2i or i2v generation - generate_at_target_size: If True, generate at TARGET_IMAGE_SIZE dimensions - - Returns: - Dictionary containing generated data information - """ - bt.logging.info(f"Generating synthetic data from provided prompt: {prompt}") - - # Default to t2i if task is not specified - if task is None: - task = 't2i' - - # If model_name is not specified, select one based on the task - if self.model_name is None and self.use_random_model: - bt.logging.warning(f"No model configured. Using random model.") - if task == 't2i': - model_candidates = T2I_MODEL_NAMES - elif task == 't2v': - model_candidates = T2V_MODEL_NAMES - elif task == 'i2i': - model_candidates = I2I_MODEL_NAMES - elif task == 'i2v': - model_candidates = I2V_MODEL_NAMES - else: - raise ValueError(f"Unsupported task: {task}") - - self.model_name = random.choice(model_candidates) - - # Validate input image for tasks that require it - if task in ['i2i', 'i2v'] and image is None: - raise ValueError(f"Input image is required for {task} generation") - - # Run the generation with the provided prompt - gen_data = self._run_generation( - prompt=prompt, - task=task, - model_name=self.model_name, - image=image, - generate_at_target_size=generate_at_target_size - ) - - # Clean up GPU memory - self.clear_gpu() - - return gen_data - + torch.cuda.empty_cache() \ No newline at end of file From 3dd2b24dedb2d2961bf15a0cb9ccbb07bbee6894 Mon Sep 17 00:00:00 2001 From: benliang99 Date: Mon, 14 Apr 2025 03:51:04 +0000 Subject: [PATCH 10/15] add i2v models to batch generation interleaving --- .../synthetic_data_generator.py | 68 ++++++++++++++++++- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index 11ccf214..e2122376 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -169,9 +169,12 @@ def batch_generate(self, batch_size: int = 5) -> None: i2i_model_names = random.sample(I2I_MODEL_NAMES, len(I2I_MODEL_NAMES)) t2i_model_names = random.sample(T2I_MODEL_NAMES, len(T2I_MODEL_NAMES)) t2v_model_names = random.sample(T2V_MODEL_NAMES, len(T2V_MODEL_NAMES)) + i2v_model_names = random.sample(I2V_MODEL_NAMES, len(I2V_MODEL_NAMES)) + model_names = [ - m for triple in zip_longest(t2v_model_names, t2i_model_names, i2i_model_names) - for m in triple if m is not None + m for quad in zip_longest(t2v_model_names, t2i_model_names, + i2i_model_names, i2v_model_names) + for m in quad if m is not None ] # Generate for each model/prompt combination @@ -508,4 +511,63 @@ def clear_gpu(self) -> None: del self.model self.model = None gc.collect() - torch.cuda.empty_cache() \ No newline at end of file + torch.cuda.empty_cache() + + def generate_from_prompt( + self, + prompt: str, + task: Optional[str] = None, + image: Optional[Image.Image] = None, + generate_at_target_size: bool = False + ) -> Dict[str, Any]: + """Generate synthetic data based on a provided prompt. + + Args: + prompt: The text prompt to use for generation + task: Optional task type ('t2i', 't2v', 'i2i', 'i2v') + image: Optional input image for i2i or i2v generation + generate_at_target_size: If True, generate at TARGET_IMAGE_SIZE dimensions + + Returns: + Dictionary containing generated data information + """ + bt.logging.info(f"Generating synthetic data from provided prompt: {prompt}") + + # Default to t2i if task is not specified + if task is None: + task = 't2i' + + # If model_name is not specified, select one based on the task + if self.model_name is None and self.use_random_model: + bt.logging.warning(f"No model configured. Using random model.") + if task == 't2i': + model_candidates = T2I_MODEL_NAMES + elif task == 't2v': + model_candidates = T2V_MODEL_NAMES + elif task == 'i2i': + model_candidates = I2I_MODEL_NAMES + elif task == 'i2v': + model_candidates = I2V_MODEL_NAMES + else: + raise ValueError(f"Unsupported task: {task}") + + self.model_name = random.choice(model_candidates) + + # Validate input image for tasks that require it + if task in ['i2i', 'i2v'] and image is None: + raise ValueError(f"Input image is required for {task} generation") + + # Run the generation with the provided prompt + gen_data = self._run_generation( + prompt=prompt, + task=task, + model_name=self.model_name, + image=image, + generate_at_target_size=generate_at_target_size + ) + + # Clean up GPU memory + self.clear_gpu() + + return gen_data + From ddab61c09f0b679f655c7e0e2dec146f4fc920bb Mon Sep 17 00:00:00 2001 From: benliang99 Date: Mon, 14 Apr 2025 04:03:25 +0000 Subject: [PATCH 11/15] Removed I2V model --- bitmind/validator/challenge.py | 4 ++-- bitmind/validator/config.py | 22 +--------------------- 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/bitmind/validator/challenge.py b/bitmind/validator/challenge.py index b9767cf5..9cca7724 100644 --- a/bitmind/validator/challenge.py +++ b/bitmind/validator/challenge.py @@ -136,7 +136,7 @@ def sample_video_frames(self, video_cache): sample['video'] = sample_A['video'] + sample_B['video'] return sample - + def process_metadata(self, sample) -> bool: """Prepare challenge metadata and media for logging to Weights & Biases """ self.metadata = { @@ -179,4 +179,4 @@ def create_wandb_video(video_frames, fps): except Exception as e: bt.logging.error(e) bt.logging.error(f"{self.modality} is truncated or corrupt. Challenge skipped.") - return False + return False \ No newline at end of file diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index 9dd9c8ba..60f3b924 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -426,27 +426,7 @@ class Modality(StrEnum): T2V_MODEL_NAMES: List[str] = list(T2V_MODELS.keys()) # Image-to-video model configurations -I2V_MODELS: Dict[str, Dict[str, Any]] = { - "THUDM/CogVideoX1.5-5B-I2V": { - "pipeline_cls": CogVideoXImageToVideoPipeline, - "from_pretrained_args": { - "use_safetensors": True, - "torch_dtype": torch.bfloat16 - }, - "generate_args": { - "guidance_scale": 2, - "num_videos_per_prompt": 1, - "num_inference_steps": {"min": 50, "max": 125}, - "num_frames": 49, - "height": 768, - "width": 768, - }, - "save_args": {"fps": 8}, - "enable_model_cpu_offload": True, - "vae_enable_slicing": True, - "vae_enable_tiling": True - } -} +I2V_MODELS: Dict[str, Dict[str, Any]] = {} I2V_MODEL_NAMES: List[str] = list(I2V_MODELS.keys()) # Combined model configurations From 02bcad0a180afe482a6ba24a017a3d6507691eff Mon Sep 17 00:00:00 2001 From: benliang99 Date: Mon, 14 Apr 2025 04:41:14 +0000 Subject: [PATCH 12/15] fix: remove i2v from random task selection and add safety check --- bitmind/validator/config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index 60f3b924..4afd2d75 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -433,7 +433,6 @@ class Modality(StrEnum): MODELS: Dict[str, Dict[str, Any]] = {**T2I_MODELS, **I2I_MODELS, **T2V_MODELS, **I2V_MODELS} MODEL_NAMES: List[str] = list(MODELS.keys()) - def get_modality(model_name): if model_name in T2V_MODEL_NAMES + I2V_MODEL_NAMES: return Modality.VIDEO @@ -473,7 +472,7 @@ def select_random_model(task: Optional[str] = None) -> str: NotImplementedError: If the specified modality is not supported. """ if task is None or task == 'random': - task = np.random.choice(['t2i', 'i2i', 't2v', 'i2v']) + task = np.random.choice(['t2i', 'i2i', 't2v']) if task == 't2i': return np.random.choice(T2I_MODEL_NAMES) @@ -482,6 +481,8 @@ def select_random_model(task: Optional[str] = None) -> str: elif task == 'i2i': return np.random.choice(I2I_MODEL_NAMES) elif task == 'i2v': + if not I2V_MODEL_NAMES: + raise NotImplementedError("I2V models are not currently configured") return np.random.choice(I2V_MODEL_NAMES) else: raise NotImplementedError(f"Unsupported task: {task}") \ No newline at end of file From d06913870d1697922b0b100ccdba9b911fbffed8 Mon Sep 17 00:00:00 2001 From: benliang99 Date: Mon, 14 Apr 2025 07:02:14 +0000 Subject: [PATCH 13/15] Removed deprecated inpainting model and bumped diffusers version --- bitmind/validator/config.py | 6 ------ requirements.txt | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index 4afd2d75..fc56eefb 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -322,12 +322,6 @@ class Modality(StrEnum): "scheduler": { "cls": DEISMultistepScheduler } - }, - "stable-diffusion-v1-5/stable-diffusion-inpainting": { - "pipeline_cls": StableDiffusionInpaintPipeline, - "generate_args": { - "num_inference_steps": {"min": 40, "max": 60}, - } } } I2I_MODEL_NAMES: List[str] = list(I2I_MODELS.keys()) diff --git a/requirements.txt b/requirements.txt index d5ec9659..947e04ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ scikit-learn==1.5.2 # Deep learning tools transformers==4.48.0 -git+https://github.com/huggingface/diffusers.git@506f39af3a7b533209cc96f1732fff347070bdbd +diffusers==0.33.1 accelerate==1.2.0 bitsandbytes==0.45.0 sentencepiece==0.2.0 From 748c07982d6cdf0d8d45cd79adb1946623f7e9eb Mon Sep 17 00:00:00 2001 From: benliang99 Date: Mon, 14 Apr 2025 08:57:53 +0000 Subject: [PATCH 14/15] Revert stable-diffusion-v1-5/stable-diffusion-inpainting model removal --- bitmind/validator/config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index fc56eefb..4afd2d75 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -322,6 +322,12 @@ class Modality(StrEnum): "scheduler": { "cls": DEISMultistepScheduler } + }, + "stable-diffusion-v1-5/stable-diffusion-inpainting": { + "pipeline_cls": StableDiffusionInpaintPipeline, + "generate_args": { + "num_inference_steps": {"min": 40, "max": 60}, + } } } I2I_MODEL_NAMES: List[str] = list(I2I_MODELS.keys()) From 0ed84b27b102cebe8e74e18a2cc5cdb451605027 Mon Sep 17 00:00:00 2001 From: benliang99 Date: Mon, 14 Apr 2025 17:42:12 +0000 Subject: [PATCH 15/15] Re-removed deprecated inpainting model, fixed model verification debug statements --- bitmind/validator/config.py | 6 ------ bitmind/validator/verify_models.py | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index 4afd2d75..fc56eefb 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -322,12 +322,6 @@ class Modality(StrEnum): "scheduler": { "cls": DEISMultistepScheduler } - }, - "stable-diffusion-v1-5/stable-diffusion-inpainting": { - "pipeline_cls": StableDiffusionInpaintPipeline, - "generate_args": { - "num_inference_steps": {"min": 40, "max": 60}, - } } } I2I_MODEL_NAMES: List[str] = list(I2I_MODELS.keys()) diff --git a/bitmind/validator/verify_models.py b/bitmind/validator/verify_models.py index ff79c295..ff80c9e0 100644 --- a/bitmind/validator/verify_models.py +++ b/bitmind/validator/verify_models.py @@ -23,10 +23,10 @@ def is_model_cached(model_name): # Check if the model directory exists if os.path.isdir(model_path): - bt.logging.info(f"{model_name} is in HF cache. Skipping....") + print(f"{model_name} is in HF cache. Skipping....") return True else: - bt.logging.info(f"{model_name} is not cached. Downloading....") + print(f"{model_name} is not cached. Downloading....") return False