Skip to content

Commit ae957d6

Browse files
authored
Merge pull request #1 from Infinitode/feat/add-spsa-jpeg-defense
feat: Add SPSA attack and JPEG compression defense
2 parents a4f2c89 + bef7271 commit ae957d6

File tree

4 files changed

+114
-34
lines changed

4 files changed

+114
-34
lines changed

deepdefend/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
import deepdefend
2-
from .attacks import fgsm, pgd, bim, cw, deepfool, jsma
3-
from .defenses import adversarial_training, feature_squeezing, gradient_masking, input_transformation, defensive_distillation, randomized_smoothing, feature_denoising, thermometer_encoding, adversarial_logit_pairing, spatial_smoothing
2+
from .attacks import fgsm, pgd, bim, cw, deepfool, jsma, spsa
3+
from .defenses import adversarial_training, feature_squeezing, gradient_masking, input_transformation, defensive_distillation, randomized_smoothing, feature_denoising, thermometer_encoding, adversarial_logit_pairing, spatial_smoothing, jpeg_compression

deepdefend/attacks.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
- `cw(model, x, y, epsilon=0.01, c=1, kappa=0, num_steps=10, alpha=0.01)`: Carlini & Wagner (C&W) attack.
99
- `deepfool(model, x, y, num_steps=10)`: DeepFool attack.
1010
- `jsma(model, x, y, theta=0.1, gamma=0.1, num_steps=10)`: Jacobian-based Saliency Map Attack (JSMA).
11+
- `spsa(model, x, y, epsilon=0.01, num_steps=10)`: Simultaneous Perturbation Stochastic Approximation (SPSA) attack.
1112
"""
1213

1314
import numpy as np
@@ -220,4 +221,59 @@ def jsma(model, x, y, theta=0.1, gamma=0.1, num_steps=10):
220221
else:
221222
adversarial_example[0, perturbed_pixels[0][i], perturbed_pixels[1][i], perturbed_pixels[2][i]] -= gamma
222223

223-
return adversarial_example.numpy()
224+
return adversarial_example.numpy()
225+
226+
227+
def spsa(model, x, y, epsilon=0.01, num_steps=10, learning_rate=0.01, delta=0.01, spsa_samples=128):
228+
"""
229+
Simultaneous Perturbation Stochastic Approximation (SPSA) attack.
230+
231+
Parameters:
232+
model (tensorflow.keras.Model): The target model to attack.
233+
x (numpy.ndarray): The input example to attack.
234+
y (numpy.ndarray): The true labels of the input example.
235+
epsilon (float): The magnitude of the perturbation (default: 0.01).
236+
num_steps (int): The number of SPSA iterations (default: 10).
237+
learning_rate (float): The learning rate for the ADAM optimizer (default: 0.01).
238+
delta (float): The perturbation size for SPSA (default: 0.01).
239+
spsa_samples (int): The number of samples for SPSA (default: 128).
240+
241+
Returns:
242+
adversarial_example (numpy.ndarray): The perturbed input example.
243+
"""
244+
if x.shape[0] != 1:
245+
raise ValueError("For SPSA, input tensor x must have batch_size of 1.")
246+
247+
tf_dtype = tf.as_dtype("float32")
248+
optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
249+
250+
def loss_fn(data, label):
251+
logits = model(data)
252+
label = tf.cast(label, tf.int64)
253+
loss = tf.keras.losses.sparse_categorical_crossentropy(label, logits)
254+
return loss
255+
256+
perturbation = tf.zeros_like(x, dtype=tf_dtype)
257+
258+
for _ in range(num_steps):
259+
# SPSA gradient approximation
260+
x_shape = x.shape
261+
delta_x = delta * tf.sign(tf.random.uniform([spsa_samples // 2] + list(x_shape[1:]), minval=-1.0, maxval=1.0, dtype=tf_dtype))
262+
delta_x = tf.concat([delta_x, -delta_x], axis=0)
263+
264+
y_tiled = tf.tile(y, [spsa_samples])
265+
266+
loss_vals = loss_fn(tf.tile(x, [spsa_samples, 1, 1, 1]) + delta_x, y_tiled)
267+
loss_vals = tf.reshape(loss_vals, [spsa_samples] + [1] * (len(x_shape) - 1))
268+
269+
grad = tf.reduce_mean(loss_vals * delta_x, axis=0, keepdims=True) / delta
270+
271+
# ADAM update
272+
optimizer.apply_gradients([(grad[0], perturbation)])
273+
274+
# Projection
275+
adversarial_example = x + tf.clip_by_value(perturbation, -epsilon, epsilon)
276+
adversarial_example = tf.clip_by_value(adversarial_example, 0, 1)
277+
perturbation.assign(adversarial_example - x)
278+
279+
return adversarial_example.numpy()

deepdefend/defenses.py

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
- `thermometer_encoding(model, num_bins=10)`: Thermometer Encoding defense.
1414
- `adversarial_logit_pairing(model, paired_model)`: Adversarial Logit Pairing defense.
1515
- `spatial_smoothing(model, kernel_size=3)`: Spatial Smoothing defense.
16+
- `jpeg_compression(model, quality=75)`: JPEG Compression defense.
1617
"""
1718

1819
import numpy as np
@@ -118,18 +119,16 @@ def input_transformation(model, transformation_function=None):
118119
Returns:
119120
defended_model (tensorflow.keras.Model): The model with input transformation defense.
120121
"""
121-
defended_model = tf.keras.models.clone_model(model)
122-
defended_model.set_weights(model.get_weights())
123-
124122
def transformed_input(x):
125123
if transformation_function is not None:
126124
return transformation_function(x)
127125
else:
128126
return x
129127

130-
defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
131-
defended_model.layers[0].input = transformed_input(defended_model.layers[0].input)
132-
return defended_model
128+
input_layer = tf.keras.Input(shape=model.input_shape[1:])
129+
x = transformed_input(input_layer)
130+
predictions = model(x)
131+
return tf.keras.Model(inputs=input_layer, outputs=predictions)
133132

134133
def defensive_distillation(model, teacher_model, temperature=2):
135134
"""
@@ -170,16 +169,14 @@ def randomized_smoothing(model, noise_level=0.1):
170169
Returns:
171170
defended_model (tensorflow.keras.Model): The model with randomized smoothing defense.
172171
"""
173-
defended_model = tf.keras.models.clone_model(model)
174-
defended_model.set_weights(model.get_weights())
175-
176172
def add_noise(x):
177173
noise = tf.random.normal(shape=tf.shape(x), mean=0.0, stddev=noise_level, dtype=tf.float32)
178174
return x + noise
179175

180-
defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
181-
defended_model.layers[0].input = add_noise(defended_model.layers[0].input)
182-
return defended_model
176+
input_layer = tf.keras.Input(shape=model.input_shape[1:])
177+
x = add_noise(input_layer)
178+
predictions = model(x)
179+
return tf.keras.Model(inputs=input_layer, outputs=predictions)
183180

184181
def feature_denoising(model):
185182
"""
@@ -193,15 +190,13 @@ def feature_denoising(model):
193190
Returns:
194191
defended_model (tensorflow.keras.Model): The model with feature denoising defense.
195192
"""
196-
defended_model = tf.keras.models.clone_model(model)
197-
defended_model.set_weights(model.get_weights())
198-
199193
def denoise(x):
200194
return tf.image.total_variation(x)
201195

202-
defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
203-
defended_model.layers[0].input = denoise(defended_model.layers[0].input)
204-
return defended_model
196+
input_layer = tf.keras.Input(shape=model.input_shape[1:])
197+
x = denoise(input_layer)
198+
predictions = model(x)
199+
return tf.keras.Model(inputs=input_layer, outputs=predictions)
205200

206201
def thermometer_encoding(model, num_bins=10):
207202
"""
@@ -217,17 +212,15 @@ def thermometer_encoding(model, num_bins=10):
217212
Returns:
218213
defended_model (tensorflow.keras.Model): The model with thermometer encoding defense.
219214
"""
220-
defended_model = tf.keras.models.clone_model(model)
221-
defended_model.set_weights(model.get_weights())
222-
223215
def encode(x):
224216
x = tf.clip_by_value(x, 0, 1)
225217
x = tf.floor(x * num_bins) / num_bins
226218
return x
227219

228-
defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
229-
defended_model.layers[0].input = encode(defended_model.layers[0].input)
230-
return defended_model
220+
input_layer = tf.keras.Input(shape=model.input_shape[1:])
221+
x = encode(input_layer)
222+
predictions = model(x)
223+
return tf.keras.Model(inputs=input_layer, outputs=predictions)
231224

232225
def adversarial_logit_pairing(model, paired_model):
233226
"""
@@ -266,12 +259,33 @@ def spatial_smoothing(model, kernel_size=3):
266259
Returns:
267260
defended_model (tensorflow.keras.Model): The model with spatial smoothing defense.
268261
"""
269-
defended_model = tf.keras.models.clone_model(model)
270-
defended_model.set_weights(model.get_weights())
271-
272262
def smooth(x):
273263
return tf.nn.avg_pool2d(x, ksize=kernel_size, strides=1, padding='SAME')
274264

275-
defended_model.layers[0].input = tf.keras.Input(shape=model.input_shape[1:])
276-
defended_model.layers[0].input = smooth(defended_model.layers[0].input)
277-
return defended_model
265+
input_layer = tf.keras.Input(shape=model.input_shape[1:])
266+
x = smooth(input_layer)
267+
predictions = model(x)
268+
return tf.keras.Model(inputs=input_layer, outputs=predictions)
269+
270+
271+
def jpeg_compression(model, quality=75):
272+
"""
273+
JPEG Compression defense.
274+
275+
This defense compresses the input image using JPEG, which can remove high-frequency
276+
adversarial perturbations.
277+
278+
Parameters:
279+
model (tensorflow.keras.Model): The model to defend.
280+
quality (int): The JPEG compression quality (0-100), default is 75.
281+
282+
Returns:
283+
defended_model (tensorflow.keras.Model): The model with JPEG compression defense.
284+
"""
285+
def compress(x):
286+
return tf.map_fn(lambda img: tf.cast(tf.image.decode_jpeg(tf.image.encode_jpeg(tf.cast(img * 255, tf.uint8), quality=quality), channels=3), tf.float32) / 255.0, x)
287+
288+
input_layer = tf.keras.Input(shape=model.input_shape[1:])
289+
x = compress(input_layer)
290+
predictions = model(x)
291+
return tf.keras.Model(inputs=input_layer, outputs=predictions)

readme.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
An open-source Python library for adversarial attacks and defenses in deep learning models, enhancing the security and robustness of AI systems.
99

10+
## Changes in 0.1.4:
11+
- Added SPSA (Simultaneous Perturbation Stochastic Approximation) attack.
12+
- Added JPEG Compression defense.
13+
1014
## Changes in 0.1.3:
1115
- 5 new functions in `defenses.py`, including: Randomized Smoothing, Feature Denoising, Thermometer Encoding, Adversarial Logit Pairing (ALP), and Spatial Smoothing.
1216

@@ -76,13 +80,16 @@ adversarial_example_deepfool = deepfool(model, x_example, y_example, num_steps=1
7680

7781
# Perform JSMA attack on the example data
7882
adversarial_example_jsma = jsma(model, x_example, y_example, theta=0.1, gamma=0.1, num_steps=10)
83+
84+
# Perform SPSA attack on the example data
85+
adversarial_example_spsa = spsa(model, x_example, y_example, epsilon=0.01, num_steps=10)
7986
```
8087

8188
### Adversarial Defenses
8289

8390
```python
8491
import tensorflow as tf
85-
from deepdefend.defenses import adversarial_training, feature_squeezing, gradient_masking, input_transformation, defensive_distillation
92+
from deepdefend.defenses import adversarial_training, feature_squeezing, gradient_masking, input_transformation, defensive_distillation, jpeg_compression
8693

8794
# Load a pre-trained TensorFlow model
8895
model = ...
@@ -107,6 +114,9 @@ defended_model_transformation = input_transformation(model, transformation_funct
107114

108115
# Defensive distillation defense
109116
defended_model_distillation = defensive_distillation(model, teacher_model, temperature=2)
117+
118+
# JPEG compression defense
119+
defended_model_jpeg = jpeg_compression(model, quality=75)
110120
```
111121

112122
## Contributing

0 commit comments

Comments
 (0)