-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathOSACI.R
More file actions
352 lines (262 loc) · 13.1 KB
/
OSACI.R
File metadata and controls
352 lines (262 loc) · 13.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
##################################################################################################
## Feature Selection Using OSACI Algorithms: OSACI-Init, OSACI-Update, and OSACI-Init_Update
##
## Paper: "New feature selection methods based on opposition-based learning and
## self-adaptive cohort intelligence for predicting patient no-shows"
## Authors: Mohammed Aladeemy, Linda Adwan, Amy Booth, Mohammad T. Khasawneh, and Srikanth Poranki
## Corresponding author: Mohammed Aladeemy (mohammed.aladeemy@outlook.com)
## DOI: https://doi.org/10.1016/j.asoc.2019.105866
##################################################################################################
## Load required packages
library(pROC)
library(caret)
library(doParallel)
# Training and testing sets (Source: UCI repository
# https://archive.ics.uci.edu/ml/datasets/Parkinson+Dataset+with+replicated+acoustic+features+)
Train <- read.table("Parkinson Train Set.txt", header = TRUE)
Test <- read.table("Parkinson Test Set.txt", header = TRUE)
# Split features and class of training and testing sets
TrainClass <- Train[ ,ncol(Train)] # Training class
TrainFeatures <- Train[ ,-ncol(Train)] # Training set without class
TestClass <- Test[ ,ncol(Test)] # Testing class
TestFeatures <- Test[ ,-ncol(Test)] # Testing set without class
# Global training parameters for "train" function
TrainControl <- trainControl(method = "repeatedcv",
number = 5,
repeats = 3,
classProbs = TRUE,
allowParallel = TRUE,
search = "grid",
verboseIter = FALSE,
returnData = FALSE,
seeds = NULL,
summaryFunction = twoClassSummary
)
# Fitness function
ObjFun <- function(Solutions, d, lambda) {
ObjFunValue <- data.frame(ObjFunValue = rep(NA, (nrow(Solutions))))
Solutions <- foreach(j = seq_len(nrow(Solutions)), .combine = 'rbind', .verbose = FALSE) %dopar% {
# Repair any solution with no selected features by selecting one feature randomly
if (sum(Solutions[j, ]) == 0) {
Solutions[j, sample(seq_len(d), size = 1)] <- 1
}
# Training set with selected feature subset
SelF <- as.data.frame(TrainFeatures[ ,which(Solutions[j, ] == 1)])
# Train Decision Tree (DT) on training set with selected feature subset
DT_model <- train(SelF, TrainClass, method = "rpart", metric = "ROC",
parms = list(split = "information"),
tuneLength = 10, trControl = TrainControl)
# Fitness value (behavior)
ObjFunValue[j, ] <- ((lambda * max(DT_model$results$ROC)) + ((1 - lambda) * (d - ncol(SelF))/d))
return(cbind(t(Solutions[j, ]), ObjFunValue[j, ]))
}
return(Solutions)
}
######################################################################################
## OSACI algorithms
OSACI <- function(OSACIVariant, L, S, d, Q, k = 2, EPSILON = 0, TauMax = Inf, lambda = 0.9) {
## Check input parameters
if (is.null(OSACIVariant) || !OSACIVariant %in% c("OSACI-Init", "OSACI-Update", "OSACI-Init_Update")) {
stop('OSACI variation must be "OSACI-Init", "OSACI-Update", or "OSACI-Init_Update"')
}
if (!is.numeric(S) || S < 2 || missing(S)) {
stop("No. of candidates must be greater than 2")
}
if ((S %% 2) != 0) {
stop("No. of candidates must be even")
}
if (!is.numeric(d) || d < 2 || missing(d)) {
stop("No. of original features must be specified!")
}
if (!is.numeric(Q) || Q < 2 || missing(Q)) {
stop("No. of quality variations must be greater than 2")
}
if (!is.numeric(L) || L < 1 || missing(L)) {
stop("Max. no. of learning attempts must be greater than 0")
}
if (!is.numeric(lambda) || is.null(lambda) || lambda > 1 || lambda < 0) {
stop("lambda value must be less than 1 and greater than 0")
}
# Allocate matrices to save "for" loops outputs
Candidates <- matrix(NA, nrow = S, ncol = d)
BestObjVal <- matrix(NA, nrow = L, ncol = 2)
Results <- matrix(NA, nrow = L, ncol = d + 1)
Behaviors <- matrix(NA, nrow = Q * S, ncol = d)
## Generate initial candidates
# OBL initialization (Algorithm 1)
if (OSACIVariant %in% c("OSACI-Init", "OSACI-Init_Update")) {
# Generate S/2 random Candidates
for (i in seq_len(S/2)) {
Candidates[i, ] <- round(runif(d, 0, 1))
}
# Genereate opposite candidates of the randomly generated (S/2) solutions
for (j in seq_len(S/2)) {
Candidates[(S/2) + j, ] <- 1 - Candidates[j, ]
}
}
# Random initialization
if (OSACIVariant %in% c("OSACI-Update")) {
for (i in seq_len(S)) {
Candidates[i, ] <- round(runif(d, 0, 1))
}
}
# Evaluate initial Candidates
Candidates <- ObjFun(Candidates, d, lambda)
# Initiate counters
Tau <- 0
l <- 1
## Procedure
repeat { # While l < L
## Update the cohort using OBL Update (Algorithm 2)
if (OSACIVariant %in% c("OSACI-Update", "OSACI-Init_Update")) {
# Sort candidates in descending order based on their fitness values
Candidates <- Candidates[order(Candidates[ ,(d + 1)], decreasing = TRUE), ]
# Allocate memory for S/2 opposite candidates
OBLCandidates <- Candidates[seq_len(S/2), -(d + 1)]
# Allocate memory for S/2 mutant candidates
MutantCandidates <- Candidates[seq(from = (S/2) + 1, to = S), -(d + 1)]
for(j in seq_len(S)) { # Generate S/2 Type-I opposite candidates
if (j <= S/2){
OBLCandidates[j, ] <- 1 - OBLCandidates[j, ]
}
if (j > S/2) { # Generate S/2 mutant candidates
jj <- sample(seq_len(d), size = (1/S) * d)
MutantCandidates[j-(S/2), jj] <- 1 - MutantCandidates[j-(S/2), jj]
}
}
# Evaluate opposite candidates
OBLCandidates <- ObjFun(OBLCandidates, d, lambda)
# Select the fittest S/2 candidates from S/2 original candidates and their opposites
OBLCandidates <- rbind(Candidates[seq_len(S/2), ], OBLCandidates)
Candidates[seq_len(S/2), ] <- OBLCandidates[order(OBLCandidates[ ,ncol(OBLCandidates)], decreasing = TRUE), ][seq_len(S/2), ]
# Evaluate mutant candidates
Candidates[seq(from = (S/2) + 1, to = S), ] <- ObjFun(MutantCandidates, d, lambda)
}
## Update the cohort in OSACI-Init
if (OSACIVariant %in% c("OSACI-Init")) {
# Sort candidates in ascending order based on their fitness values to keep the fittest candidate in last row
Candidates <- Candidates[order(Candidates[ ,(d + 1)], decreasing = FALSE), ]
# Allocate memory for a set of S - 1 mutant candidates
MutantCandidates <- Candidates[seq_len(S - 1), -(d + 1)]
# Select S - 1 candidates using tournament selection
for (s in seq_len(S - 1)) {
randsel <- sample(seq_len(S), size = k)
MutantCandidates[s, ] <- Candidates[randsel[which.max(Candidates[randsel, (d + 1)])], -(d + 1)]
}
# Mutation with rate of 1/S
for (j in seq_len(S - 1)) {
jj <- sample(seq_len(d), size = (1/S) * d)
MutantCandidates[j, jj] <- 1 - MutantCandidates[j, jj]
}
# Evaluate mutant candidates
Candidates[-S, ] <- ObjFun(MutantCandidates, d, lambda)
}
# Allocate memory for selected candidates
SelectedCandidates <- Candidates[order(Candidates[ ,(d + 1)], decreasing = FALSE), ]
## Each candidate selects a candidate using tournament selection
for (s in seq_len(S)) {
randsel <- sample(seq_len(S), size = k)
SelectedCandidates[s, ] <- Candidates[randsel[which.max(Candidates[randsel, (d + 1)])], ]
}
# Each candidate samples Q solutions (qualities) from its neighborhood
for (s in seq_len(S)) {
lo <- (s - 1) * Q
# Update mutation rate
ms_l <- max(1, ceiling(d * (SelectedCandidates[which.max(SelectedCandidates[ ,(d + 1)]) ,(d + 1)] - SelectedCandidates[s ,(d + 1)])))
for (kk in seq_len(Q)) {
jj <- sample(seq_len(d), size = ms_l, replace = FALSE)
Behaviors[(lo + kk), jj] <- 1 - SelectedCandidates[s, jj]
Behaviors[(lo + kk), -jj] <- SelectedCandidates[s, -c(jj, (d + 1))]
}
}
# Evaluate solutions sampled from each candidate's neighborhood
Behaviors <- ObjFun(Behaviors, d, lambda)
# Update current candidates
kk <- 0
for (s in seq_len(S)) {
lo <- (s - 1) * Q
kk <- kk + Q
# The fittest solution sampled from neighborhood of candidate s
BestSampled_Q <- Behaviors[which.max(Behaviors[(lo + 1):kk, (d + 1)]) + lo, ]
Candidates[s, ] <- BestSampled_Q
# if (s < S) {
# Candidates[s, ] <- BestSampled_Q
# } else {
# Candidates[s, BestSampled_Q[(d + 1)] >= Candidates[s, (d + 1)]] <- BestSampled_Q
# }
}
# Resize Behaviors matrix
Behaviors <- Behaviors[ ,-ncol(Behaviors)]
## Save the best solution obtained in the current learning attempt
# Max. and min. objective values to evaluate cohort saturation
BestObjVal[l, 1] <- Candidates[which.max(Candidates[ ,(d + 1)]), (d + 1)]
BestObjVal[l, 2] <- Candidates[which.min(Candidates[ ,(d + 1)]), (d + 1)]
# Save the fittest solution in the current learning attempt
if (l == 1) {
Results[l, ] <- Candidates[which.max(Candidates[ ,(d + 1)]), ]
}
if (Results[(l - 1), (d + 1)] <= BestObjVal[l, 1] && l > 1) {
Results[l, ] <- Candidates[which.max(Candidates[ ,(d + 1)]), ]
}
if (Results[(l - 1), (d + 1)] > BestObjVal[l, 1] && l > 1) {
Results[l, ] <- Results[(l - 1), ]
}
## Cohort saturation reached?
# Update Tau
if (l > 1 && abs(BestObjVal[l, 1] - BestObjVal[(l - 1), 1]) <= EPSILON) {
Tau <- Tau + 1
} else {
Tau <- 0
}
## Stopping criteria
# Terminate if cohort saturation is reached
if (Tau == TauMax) break()
# Terminate if max. no. of learning attempts is reached
if (l == L) break()
# Update counter
l <- l + 1
} # End of "repeat" loop
return(Results)
} # End of OSACI function
######################################################################################
## Create a cluster for parallel computing
Numclust <- detectCores(logical = TRUE) - 1
clust <- makeCluster(Numclust, type = "SOCK")
registerDoParallel(clust)
varlist <- ls(envir = parent.frame(), all.names = TRUE)
parallel::clusterExport(clust, varlist = varlist, envir = parent.frame())
invisible(lapply(.packages(), function(pkg)
parallel::clusterCall(clust, library, package = pkg, character.only = TRUE))[1])
# Start timer for CPU time
StartTime <- proc.time()
Results <- OSACI(OSACIVariant = "OSACI-Init", # "OSACI-Init", "OSACI-Update", or "OSACI-Init_Update"
L = 50, # Max. no. of learning attempts
S = 10, # No. of candidates
d = ncol(TrainFeatures), # No. of features
Q = 10, # No. of quality variations (solutions sampled from neighborhood)
k = 2, # Tournament size (Default = 2)
EPSILON = 0, # Convergence tolerence (Default = 0)
TauMax = Inf, # No. of successive learning attempts for cohort saturation (Default = Inf)
lambda = 0.9 # Trade-off factor in fitness function (Default = 0.9)
)
# CPU time
RunTime <- (proc.time() - StartTime)[3]
# Best solution
Sol <- Results[which.max(Results[ ,ncol(Train)]), ]
# Selected feature subset
SelectedFeatures <- which(Sol[-c((length(Sol) - 1), (length(Sol)))] == 1)
# Train DT model on training set using selected feature subset
DTmodel <- train(as.data.frame(TrainFeatures[ ,SelectedFeatures]), TrainClass,
method = "rpart", metric = "ROC", parms = list(split = "information"),
tuneLength = 10, trControl = TrainControl)
## Performance measures on testing set
TestPred <- confusionMatrix(data = predict(DTmodel, TestFeatures[ ,SelectedFeatures]),
reference = factor(TestClass), positive = levels(factor(TestClass))[2])
# Sensitivity (true positive rate)
SensTest <- round(TestPred$byClass[1], 3)[1]
# Specificity (true negative rate)
SpecTest <- round(TestPred$byClass[2], 3)[1]
# AUC
ROCTest <- round(roc(as.factor(TestClass), as.numeric(predict(DTmodel, Test[ ,SelectedFeatures])))$auc[1], 3)
## Shut down workers of the cluster
stopCluster(clust)