Merge pull request #1 from Infinitode/feat/add-spsa-jpeg-defense

Infinitode · web-flow · commit ae957d6fd4df · 2025-09-10T19:08:33.000+02:00
feat: Add SPSA attack and JPEG compression defense
diff --git a/deepdefend/__init__.py b/deepdefend/__init__.py
@@ -1,3 +1,3 @@
 import deepdefend
-from .attacks import fgsm, pgd, bim, cw, deepfool, jsma
-from .defenses import adversarial_training, feature_squeezing, gradient_masking, input_transformation, defensive_distillation, randomized_smoothing, feature_denoising, thermometer_encoding, adversarial_logit_pairing, spatial_smoothing
+from .attacks import fgsm, pgd, bim, cw, deepfool, jsma, spsa
+from .defenses import adversarial_training, feature_squeezing, gradient_masking, input_transformation, defensive_distillation, randomized_smoothing, feature_denoising, thermometer_encoding, adversarial_logit_pairing, spatial_smoothing, jpeg_compression
diff --git a/deepdefend/attacks.py b/deepdefend/attacks.py
@@ -8,6 +8,7 @@
 - `cw(model, x, y, epsilon=0.01, c=1, kappa=0, num_steps=10, alpha=0.01)`: Carlini & Wagner (C&W) attack.
 - `deepfool(model, x, y, num_steps=10)`: DeepFool attack.
 - `jsma(model, x, y, theta=0.1, gamma=0.1, num_steps=10)`: Jacobian-based Saliency Map Attack (JSMA).
+- `spsa(model, x, y, epsilon=0.01, num_steps=10)`: Simultaneous Perturbation Stochastic Approximation (SPSA) attack.
 """
 
 import numpy as np
@@ -220,4 +221,59 @@ def jsma(model, x, y, theta=0.1, gamma=0.1, num_steps=10):
             else:
                 adversarial_example[0, perturbed_pixels[0][i], perturbed_pixels[1][i], perturbed_pixels[2][i]] -= gamma
 
-    return adversarial_example.numpy()
+    return adversarial_example.numpy()
+
+
+def spsa(model, x, y, epsilon=0.01, num_steps=10, learning_rate=0.01, delta=0.01, spsa_samples=128):
+    """
+    Simultaneous Perturbation Stochastic Approximation (SPSA) attack.
+
+    Parameters:
+        model (tensorflow.keras.Model): The target model to attack.
+        x (numpy.ndarray): The input example to attack.
+        y (numpy.ndarray): The true labels of the input example.
+        epsilon (float): The magnitude of the perturbation (default: 0.01).
+        num_steps (int): The number of SPSA iterations (default: 10).
+        learning_rate (float): The learning rate for the ADAM optimizer (default: 0.01).
+        delta (float): The perturbation size for SPSA (default: 0.01).
+        spsa_samples (int): The number of samples for SPSA (default: 128).
+
+    Returns:
+        adversarial_example (numpy.ndarray): The perturbed input example.
+    """
+    if x.shape[0] != 1:
+        raise ValueError("For SPSA, input tensor x must have batch_size of 1.")
+
+    tf_dtype = tf.as_dtype("float32")
+    optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
+
+    def loss_fn(data, label):
+        logits = model(data)
+        label = tf.cast(label, tf.int64)
+        loss = tf.keras.losses.sparse_categorical_crossentropy(label, logits)
+        return loss
+
+    perturbation = tf.zeros_like(x, dtype=tf_dtype)
+
+    for _ in range(num_steps):
+        # SPSA gradient approximation
+        x_shape = x.shape
+        delta_x = delta * tf.sign(tf.random.uniform([spsa_samples // 2] + list(x_shape[1:]), minval=-1.0, maxval=1.0, dtype=tf_dtype))
+        delta_x = tf.concat([delta_x, -delta_x], axis=0)
+
+        y_tiled = tf.tile(y, [spsa_samples])
+
+        loss_vals = loss_fn(tf.tile(x, [spsa_samples, 1, 1, 1]) + delta_x, y_tiled)
+        loss_vals = tf.reshape(loss_vals, [spsa_samples] + [1] * (len(x_shape) - 1))
+
+        grad = tf.reduce_mean(loss_vals * delta_x, axis=0, keepdims=True) / delta
+
+        # ADAM update
+        optimizer.apply_gradients([(grad[0], perturbation)])
+
+        # Projection
+        adversarial_example = x + tf.clip_by_value(perturbation, -epsilon, epsilon)
+        adversarial_example = tf.clip_by_value(adversarial_example, 0, 1)
+        perturbation.assign(adversarial_example - x)
+
+    return adversarial_example.numpy()
diff --git a/deepdefend/defenses.py b/deepdefend/defenses.py
@@ -13,6 +13,7 @@
 - `thermometer_encoding(model, num_bins=10)`: Thermometer Encoding defense.
 - `adversarial_logit_pairing(model, paired_model)`: Adversarial Logit Pairing defense.
 - `spatial_smoothing(model, kernel_size=3)`: Spatial Smoothing defense.
+- `jpeg_compression(model, quality=75)`: JPEG Compression defense.
 """
 
 import numpy as np
@@ -118,18 +119,16 @@ def input_transformation(model, transformation_function=None):
     Returns:
         defended_model (tensorflow.keras.Model): The model with input transformation defense.
     """
-    defended_model = tf.keras.models.clone_model(model)
-    defended_model.set_weights(model.get_weights())
-
     def transformed_input(x):
         if transformation_function is not None:
             return transformation_function(x)
         else:
             return x
 
-    defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
-    defended_model.layers[0].input = transformed_input(defended_model.layers[0].input)
-    return defended_model
+    input_layer = tf.keras.Input(shape=model.input_shape[1:])
+    x = transformed_input(input_layer)
+    predictions = model(x)
+    return tf.keras.Model(inputs=input_layer, outputs=predictions)
 
 def defensive_distillation(model, teacher_model, temperature=2):
     """
@@ -170,16 +169,14 @@ def randomized_smoothing(model, noise_level=0.1):
     Returns:
         defended_model (tensorflow.keras.Model): The model with randomized smoothing defense.
     """
-    defended_model = tf.keras.models.clone_model(model)
-    defended_model.set_weights(model.get_weights())
-
     def add_noise(x):
         noise = tf.random.normal(shape=tf.shape(x), mean=0.0, stddev=noise_level, dtype=tf.float32)
         return x + noise
 
-    defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
-    defended_model.layers[0].input = add_noise(defended_model.layers[0].input)
-    return defended_model
+    input_layer = tf.keras.Input(shape=model.input_shape[1:])
+    x = add_noise(input_layer)
+    predictions = model(x)
+    return tf.keras.Model(inputs=input_layer, outputs=predictions)
 
 def feature_denoising(model):
     """
@@ -193,15 +190,13 @@ def feature_denoising(model):
     Returns:
         defended_model (tensorflow.keras.Model): The model with feature denoising defense.
     """
-    defended_model = tf.keras.models.clone_model(model)
-    defended_model.set_weights(model.get_weights())
-
     def denoise(x):
         return tf.image.total_variation(x)
 
-    defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
-    defended_model.layers[0].input = denoise(defended_model.layers[0].input)
-    return defended_model
+    input_layer = tf.keras.Input(shape=model.input_shape[1:])
+    x = denoise(input_layer)
+    predictions = model(x)
+    return tf.keras.Model(inputs=input_layer, outputs=predictions)
 
 def thermometer_encoding(model, num_bins=10):
     """
@@ -217,17 +212,15 @@ def thermometer_encoding(model, num_bins=10):
     Returns:
         defended_model (tensorflow.keras.Model): The model with thermometer encoding defense.
     """
-    defended_model = tf.keras.models.clone_model(model)
-    defended_model.set_weights(model.get_weights())
-
     def encode(x):
         x = tf.clip_by_value(x, 0, 1)
         x = tf.floor(x * num_bins) / num_bins
         return x
 
-    defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
-    defended_model.layers[0].input = encode(defended_model.layers[0].input)
-    return defended_model
+    input_layer = tf.keras.Input(shape=model.input_shape[1:])
+    x = encode(input_layer)
+    predictions = model(x)
+    return tf.keras.Model(inputs=input_layer, outputs=predictions)
 
 def adversarial_logit_pairing(model, paired_model):
     """
@@ -266,12 +259,33 @@ def spatial_smoothing(model, kernel_size=3):
     Returns:
         defended_model (tensorflow.keras.Model): The model with spatial smoothing defense.
     """
-    defended_model = tf.keras.models.clone_model(model)
-    defended_model.set_weights(model.get_weights())
-
     def smooth(x):
         return tf.nn.avg_pool2d(x, ksize=kernel_size, strides=1, padding='SAME')
 
-    defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
-    defended_model.layers[0].input = smooth(defended_model.layers[0].input)
-    return defended_model
+    input_layer = tf.keras.Input(shape=model.input_shape[1:])
+    x = smooth(input_layer)
+    predictions = model(x)
+    return tf.keras.Model(inputs=input_layer, outputs=predictions)
+
+
+def jpeg_compression(model, quality=75):
+    """
+    JPEG Compression defense.
+
+    This defense compresses the input image using JPEG, which can remove high-frequency
+    adversarial perturbations.
+
+    Parameters:
+        model (tensorflow.keras.Model): The model to defend.
+        quality (int): The JPEG compression quality (0-100), default is 75.
+
+    Returns:
+        defended_model (tensorflow.keras.Model): The model with JPEG compression defense.
+    """
+    def compress(x):
+        return tf.map_fn(lambda img: tf.cast(tf.image.decode_jpeg(tf.image.encode_jpeg(tf.cast(img * 255, tf.uint8), quality=quality), channels=3), tf.float32) / 255.0, x)
+
+    input_layer = tf.keras.Input(shape=model.input_shape[1:])
+    x = compress(input_layer)
+    predictions = model(x)
+    return tf.keras.Model(inputs=input_layer, outputs=predictions)
diff --git a/readme.md b/readme.md
@@ -7,6 +7,10 @@
 
 An open-source Python library for adversarial attacks and defenses in deep learning models, enhancing the security and robustness of AI systems.
 
+## Changes in 0.1.4:
+- Added SPSA (Simultaneous Perturbation Stochastic Approximation) attack.
+- Added JPEG Compression defense.
+
 ## Changes in 0.1.3:
 - 5 new functions in `defenses.py`, including: Randomized Smoothing, Feature Denoising, Thermometer Encoding, Adversarial Logit Pairing (ALP), and Spatial Smoothing.
 
@@ -76,13 +80,16 @@ adversarial_example_deepfool = deepfool(model, x_example, y_example, num_steps=1
 
 # Perform JSMA attack on the example data
 adversarial_example_jsma = jsma(model, x_example, y_example, theta=0.1, gamma=0.1, num_steps=10)
+
+# Perform SPSA attack on the example data
+adversarial_example_spsa = spsa(model, x_example, y_example, epsilon=0.01, num_steps=10)
 ```
 
 ### Adversarial Defenses
 
 ```python
 import tensorflow as tf
-from deepdefend.defenses import adversarial_training, feature_squeezing, gradient_masking, input_transformation, defensive_distillation
+from deepdefend.defenses import adversarial_training, feature_squeezing, gradient_masking, input_transformation, defensive_distillation, jpeg_compression
 
 # Load a pre-trained TensorFlow model
 model = ...
@@ -107,6 +114,9 @@ defended_model_transformation = input_transformation(model, transformation_funct
 
 # Defensive distillation defense
 defended_model_distillation = defensive_distillation(model, teacher_model, temperature=2)
+
+# JPEG compression defense
+defended_model_jpeg = jpeg_compression(model, quality=75)
 ```
 
 ## Contributing