From e3522b2de80a9ca8cf163714ea4defda278e855f Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Mon, 28 Apr 2025 03:23:19 -0700 Subject: [PATCH 1/3] handling case where miner only predicts 2 dimensional vector --- bitmind/protocol.py | 123 ++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 67 deletions(-) diff --git a/bitmind/protocol.py b/bitmind/protocol.py index e75296e1..bdcf7413 100644 --- a/bitmind/protocol.py +++ b/bitmind/protocol.py @@ -31,6 +31,7 @@ from bitmind.validator.config import TARGET_IMAGE_SIZE from bitmind.utils.image_transforms import get_base_transforms + base_transforms = get_base_transforms(TARGET_IMAGE_SIZE) @@ -49,6 +50,7 @@ # predictions = dendrite.query( ImageSynapse( images = b64_images ) ) # assert len(predictions) == len(b64_images) + def prepare_synapse(input_data, modality): if isinstance(input_data, torch.Tensor): input_data = transforms.ToPILImage()(input_data.cpu().detach()) @@ -56,9 +58,9 @@ def prepare_synapse(input_data, modality): for i, img in enumerate(input_data): input_data[i] = transforms.ToPILImage()(img.cpu().detach()) - if modality == 'image': + if modality == "image": return prepare_image_synapse(input_data) - elif modality == 'video': + elif modality == "video": return prepare_video_synapse(input_data) else: raise NotImplementedError(f"Unsupported modality: {modality}") @@ -80,32 +82,37 @@ def prepare_image_synapse(image: Image): return ImageSynapse(image=b64_encoded_image) -class ImageSynapse(bt.Synapse): +def prepare_video_synapse(frames: List[Image.Image]): """ - This protocol helps in handling image/prediction request and response communication between - the miner and the validator. + Prepares video frames for use with VideoSynapse object. - Attributes: - - image: a bas64 encoded images - - prediction: a float indicating the probabilty that the image is AI generated/modified. - >.5 is considered generated/modified, <= 0.5 is considered real. + Args: + frames (List[Image.Image]): The list of video frames to be prepared. + + Returns: + VideoSynapse: An instance of VideoSynapse containing the encoded frames and a default prediction value. """ + frame_bytes = [] + for frame in frames: + buffer = BytesIO() + frame.save(buffer, format="JPEG") + frame_bytes.append(buffer.getvalue()) - testnet_label: int = -1 # for easier miner eval on testnet + combined_bytes = b"".join(frame_bytes) + compressed_data = zlib.compress(combined_bytes) + encoded_data = base64.b85encode(compressed_data).decode("utf-8") + return VideoSynapse(video=encoded_data) - # Required request input, filled by sending dendrite caller. - image: str = pydantic.Field( - title="Image", - description="A base64 encoded image", - default="", - frozen=False - ) + +class MediaSynapse(bt.Synapse): + + testnet_label: int = -1 # for miners to monitor their performance on testnet prediction: Union[float, List[float]] = pydantic.Field( title="Prediction", description="Probability vector for [real, synthetic, semi-synthetic] classes.", - default=[-1., -1., -1.], - frozen=False + default=[-1.0, -1.0, -1.0], + frozen=False, ) def deserialize(self) -> np.ndarray: @@ -118,73 +125,49 @@ def deserialize(self) -> np.ndarray: p = self.prediction if isinstance(p, float): if p == -1: - return np.array([-1., -1., -1.]) + return np.array([-1.0, -1.0, -1.0]) else: - return np.array([1-p, p, 0.]) + return np.array([1 - p, p, 0.0]) elif isinstance(p, list): + if len(p) == 2: + p += [0.0] # assume 2-dim responses are [real, fake] return np.array(p) else: raise ValueError(f"Unsupported prediction type: {type(p)}") -def prepare_video_synapse(frames: List[Image.Image]): +class ImageSynapse(MediaSynapse): """ + This protocol helps in handling image/prediction request and response communication between + the miner and the validator. + + Attributes: + - image: a bas64 encoded images + - prediction: a float indicating the probabilty that the image is AI generated/modified. + >.5 is considered generated/modified, <= 0.5 is considered real. """ - frame_bytes = [] - for frame in frames: - buffer = BytesIO() - frame.save(buffer, format="JPEG") - frame_bytes.append(buffer.getvalue()) - combined_bytes = b''.join(frame_bytes) - compressed_data = zlib.compress(combined_bytes) - encoded_data = base64.b85encode(compressed_data).decode('utf-8') - return VideoSynapse(video=encoded_data) + image: str = pydantic.Field( + title="Image", description="A base64 encoded image", default="", frozen=False + ) + -class VideoSynapse(bt.Synapse): +class VideoSynapse(MediaSynapse): """ - Naive initial VideoSynapse + Naive initial VideoSynapse Better option would be to modify the Dendrite interface to allow multipart/form-data here: https://github.com/opentensor/bittensor/blob/master/bittensor/core/dendrite.py#L533 Another higher lift option would be to look into Epistula or Fiber """ - testnet_label: int = -1 # for easier miner eval on testnet - # Required request input, filled by sending dendrite caller. video: str = pydantic.Field( title="Video", description="A wildly inefficient means of sending video data", default="", - frozen=False - ) - - # Optional request output, filled by receiving axon. - prediction: Union[float, List[float]] = pydantic.Field( - title="Prediction", - description="Probability vector for [real, synthetic, semi-synthetic] classes.", - default=[-1., -1., -1.], - frozen=False + frozen=False, ) - def deserialize(self) -> np.ndarray: - """ - Deserialize the output. Backwards compatible with binary float outputs. - - Returns: - - float: The deserialized miner prediction probabilities - """ - p = self.prediction - if isinstance(p, float): - if p == -1: - return np.array([-1., -1., -1.]) - else: - return np.array([1-p, p, 0.]) - elif isinstance(p, list): - return np.array(p) - else: - raise ValueError(f"Unsupported prediction type: {type(p)}") - def decode_video_synapse(synapse: VideoSynapse) -> List[torch.Tensor]: """ @@ -196,7 +179,7 @@ def decode_video_synapse(synapse: VideoSynapse) -> List[torch.Tensor]: Returns: List of torch tensors, each representing a frame from the video """ - compressed_data = base64.b85decode(synapse.video.encode('utf-8')) + compressed_data = base64.b85decode(synapse.video.encode("utf-8")) combined_bytes = zlib.decompress(compressed_data) # Split the combined bytes into individual JPEG files @@ -208,7 +191,10 @@ def decode_video_synapse(synapse: VideoSynapse) -> List[torch.Tensor]: while current_pos < data_length: # Find start of JPEG (FF D8) while current_pos < data_length - 1: - if combined_bytes[current_pos] == 0xFF and combined_bytes[current_pos + 1] == 0xD8: + if ( + combined_bytes[current_pos] == 0xFF + and combined_bytes[current_pos + 1] == 0xD8 + ): break current_pos += 1 @@ -219,7 +205,10 @@ def decode_video_synapse(synapse: VideoSynapse) -> List[torch.Tensor]: # Find end of JPEG (FF D9) while current_pos < data_length - 1: - if combined_bytes[current_pos] == 0xFF and combined_bytes[current_pos + 1] == 0xD9: + if ( + combined_bytes[current_pos] == 0xFF + and combined_bytes[current_pos + 1] == 0xD9 + ): current_pos += 2 break current_pos += 1 @@ -234,10 +223,10 @@ def decode_video_synapse(synapse: VideoSynapse) -> List[torch.Tensor]: print(f"Error processing frame: {e}") continue - bt.logging.info('transforming video inputs') + bt.logging.info("transforming video inputs") frames = base_transforms(frames) frames = torch.stack(frames, dim=0) frames = frames.unsqueeze(0) - print(f'decoded video into tensor with shape {frames.shape}') + print(f"decoded video into tensor with shape {frames.shape}") return frames From 5923e609c75bbe84a256fd34019287ad07d33d97 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Mon, 28 Apr 2025 10:37:59 +0000 Subject: [PATCH 2/3] bump version --- bitmind/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitmind/__init__.py b/bitmind/__init__.py index b0552aa1..1c4a95f8 100644 --- a/bitmind/__init__.py +++ b/bitmind/__init__.py @@ -18,7 +18,7 @@ # DEALINGS IN THE SOFTWARE. -__version__ = "2.2.9" +__version__ = "2.2.10" version_split = __version__.split(".") __spec_version__ = ( (1000 * int(version_split[0])) From 4f16d7228bbef5867fb7c93eda4273eb0abd7506 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Mon, 28 Apr 2025 03:53:18 -0700 Subject: [PATCH 3/3] comment update --- bitmind/protocol.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bitmind/protocol.py b/bitmind/protocol.py index bdcf7413..fc05d43b 100644 --- a/bitmind/protocol.py +++ b/bitmind/protocol.py @@ -154,10 +154,7 @@ class ImageSynapse(MediaSynapse): class VideoSynapse(MediaSynapse): """ - Naive initial VideoSynapse - Better option would be to modify the Dendrite interface to allow multipart/form-data here: - https://github.com/opentensor/bittensor/blob/master/bittensor/core/dendrite.py#L533 - Another higher lift option would be to look into Epistula or Fiber + Naive initial VideoSynapse (Epistula version coming soon I promise) """ # Required request input, filled by sending dendrite caller.