88- `gradient_masking(model, mask_threshold=0.1)`: Gradient Masking defense.
99- `input_transformation(model, transformation_function=None)`: Input Transformation defense.
1010- `defensive_distillation(model, teacher_model, temperature=2)`: Defensive Distillation defense.
11+ - `randomized_smoothing(model, noise_level=0.1)`: Randomized Smoothing defense.
12+ - `feature_denoising(model)`: Feature Denoising defense.
13+ - `thermometer_encoding(model, num_bins=10)`: Thermometer Encoding defense.
14+ - `adversarial_logit_pairing(model, paired_model)`: Adversarial Logit Pairing defense.
15+ - `spatial_smoothing(model, kernel_size=3)`: Spatial Smoothing defense.
1116"""
1217
1318import numpy as np
@@ -149,4 +154,124 @@ def distilled_loss(y_true, y_pred):
149154 return tf .keras .losses .CategoricalCrossentropy ()(y_true , y_pred ) + temperature ** 2 * tf .keras .losses .CategoricalCrossentropy ()(teacher_predictions , y_pred )
150155
151156 defended_model .compile (optimizer = 'adam' , loss = distilled_loss , metrics = ['accuracy' ])
157+ return defended_model
158+
159+ def randomized_smoothing (model , noise_level = 0.1 ):
160+ """
161+ Randomized Smoothing defense.
162+
163+ Randomized smoothing adds random noise to the input data to make the model more robust
164+ to adversarial attacks.
165+
166+ Parameters:
167+ model (tensorflow.keras.Model): The model to defend.
168+ noise_level (float): The standard deviation of the Gaussian noise (default: 0.1).
169+
170+ Returns:
171+ defended_model (tensorflow.keras.Model): The model with randomized smoothing defense.
172+ """
173+ defended_model = tf .keras .models .clone_model (model )
174+ defended_model .set_weights (model .get_weights ())
175+
176+ def add_noise (x ):
177+ noise = tf .random .normal (shape = tf .shape (x ), mean = 0.0 , stddev = noise_level , dtype = tf .float32 )
178+ return x + noise
179+
180+ defended_model .layers [0 ].input = tf .keras .Input (shape = model .input_shape [1 :])
181+ defended_model .layers [0 ].input = add_noise (defended_model .layers [0 ].input )
182+ return defended_model
183+
184+ def feature_denoising (model ):
185+ """
186+ Feature Denoising defense.
187+
188+ Feature denoising applies denoising operations to the input data to remove adversarial perturbations.
189+
190+ Parameters:
191+ model (tensorflow.keras.Model): The model to defend.
192+
193+ Returns:
194+ defended_model (tensorflow.keras.Model): The model with feature denoising defense.
195+ """
196+ defended_model = tf .keras .models .clone_model (model )
197+ defended_model .set_weights (model .get_weights ())
198+
199+ def denoise (x ):
200+ return tf .image .total_variation (x )
201+
202+ defended_model .layers [0 ].input = tf .keras .Input (shape = model .input_shape [1 :])
203+ defended_model .layers [0 ].input = denoise (defended_model .layers [0 ].input )
204+ return defended_model
205+
206+ def thermometer_encoding (model , num_bins = 10 ):
207+ """
208+ Thermometer Encoding defense.
209+
210+ Thermometer encoding discretizes the input features into bins, making it harder for adversarial perturbations
211+ to affect the model.
212+
213+ Parameters:
214+ model (tensorflow.keras.Model): The model to defend.
215+ num_bins (int): The number of bins for encoding (default: 10).
216+
217+ Returns:
218+ defended_model (tensorflow.keras.Model): The model with thermometer encoding defense.
219+ """
220+ defended_model = tf .keras .models .clone_model (model )
221+ defended_model .set_weights (model .get_weights ())
222+
223+ def encode (x ):
224+ x = tf .clip_by_value (x , 0 , 1 )
225+ x = tf .floor (x * num_bins ) / num_bins
226+ return x
227+
228+ defended_model .layers [0 ].input = tf .keras .Input (shape = model .input_shape [1 :])
229+ defended_model .layers [0 ].input = encode (defended_model .layers [0 ].input )
230+ return defended_model
231+
232+ def adversarial_logit_pairing (model , paired_model ):
233+ """
234+ Adversarial Logit Pairing (ALP) defense.
235+
236+ Adversarial logit pairing encourages the logits of adversarial examples to be similar to those of clean examples.
237+
238+ Parameters:
239+ model (tensorflow.keras.Model): The model to defend.
240+ paired_model (tensorflow.keras.Model): The paired model for logit pairing.
241+
242+ Returns:
243+ defended_model (tensorflow.keras.Model): The model with adversarial logit pairing defense.
244+ """
245+ defended_model = tf .keras .models .clone_model (model )
246+ defended_model .set_weights (model .get_weights ())
247+
248+ def alp_loss (y_true , y_pred ):
249+ clean_logits = model (y_true )
250+ adv_logits = paired_model (y_true )
251+ return tf .keras .losses .CategoricalCrossentropy ()(y_true , y_pred ) + tf .reduce_mean (tf .square (clean_logits - adv_logits ))
252+
253+ defended_model .compile (optimizer = 'adam' , loss = alp_loss , metrics = ['accuracy' ])
254+ return defended_model
255+
256+ def spatial_smoothing (model , kernel_size = 3 ):
257+ """
258+ Spatial Smoothing defense.
259+
260+ Spatial smoothing applies a smoothing filter to the input data to remove adversarial perturbations.
261+
262+ Parameters:
263+ model (tensorflow.keras.Model): The model to defend.
264+ kernel_size (int): The size of the smoothing kernel (default: 3).
265+
266+ Returns:
267+ defended_model (tensorflow.keras.Model): The model with spatial smoothing defense.
268+ """
269+ defended_model = tf .keras .models .clone_model (model )
270+ defended_model .set_weights (model .get_weights ())
271+
272+ def smooth (x ):
273+ return tf .nn .avg_pool2d (x , ksize = kernel_size , strides = 1 , padding = 'SAME' )
274+
275+ defended_model .layers [0 ].input = tf .keras .Input (shape = model .input_shape [1 :])
276+ defended_model .layers [0 ].input = smooth (defended_model .layers [0 ].input )
152277 return defended_model
0 commit comments