|
@@ -4,6 +4,7 @@ import argparse
|
|
import json
|
|
import json
|
|
import numpy as np
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pandas as pd
|
|
|
|
+import logging
|
|
|
|
|
|
# models imports
|
|
# models imports
|
|
from keras.preprocessing.image import ImageDataGenerator
|
|
from keras.preprocessing.image import ImageDataGenerator
|
|
@@ -12,9 +13,10 @@ from keras.layers import Conv1D, MaxPooling1D
|
|
from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
|
|
from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
|
|
from keras.wrappers.scikit_learn import KerasClassifier
|
|
from keras.wrappers.scikit_learn import KerasClassifier
|
|
from keras import backend as K
|
|
from keras import backend as K
|
|
|
|
+from keras.callbacks import Callback
|
|
|
|
|
|
from sklearn.utils import shuffle
|
|
from sklearn.utils import shuffle
|
|
-from sklearn.metrics import roc_auc_score
|
|
|
|
|
|
+from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
|
|
|
|
|
|
# modules and config imports
|
|
# modules and config imports
|
|
import custom_config as cfg
|
|
import custom_config as cfg
|
|
@@ -50,6 +52,29 @@ def f1(y_true, y_pred):
|
|
recall = recall(y_true, y_pred)
|
|
recall = recall(y_true, y_pred)
|
|
return 2*((precision*recall)/(precision+recall+K.epsilon()))
|
|
return 2*((precision*recall)/(precision+recall+K.epsilon()))
|
|
|
|
|
|
|
|
+
|
|
|
|
+class IntervalEvaluation(Callback):
|
|
|
|
+ def __init__(self, validation_data=(), interval=1):
|
|
|
|
+ super(Callback, self).__init__()
|
|
|
|
+
|
|
|
|
+ self.interval = interval
|
|
|
|
+ self.X_val, self.y_val = validation_data
|
|
|
|
+
|
|
|
|
+ def on_epoch_end(self, epoch, logs={}):
|
|
|
|
+ if epoch % self.interval == 0:
|
|
|
|
+ y_pred = self.model.predict_proba(self.X_val, verbose=0)
|
|
|
|
+ y_pred = [ 0 if y < 0.5 else 1 for y in y_pred ]
|
|
|
|
+ auc_score = roc_auc_score(self.y_val, y_pred)
|
|
|
|
+ acc_score = accuracy_score(self.y_val, y_pred)
|
|
|
|
+ f1_test_score = f1_score(self.y_val, y_pred)
|
|
|
|
+
|
|
|
|
+ print("------------------------------")
|
|
|
|
+ print("[test dataset] for epoch {:d}".format(epoch + 1))
|
|
|
|
+ print("ROC AUC : {:.6f}".format(auc_score))
|
|
|
|
+ print("ACCURACY: {:.6f}".format(acc_score))
|
|
|
|
+ print("F1 score: {:.6f}".format(f1_test_score))
|
|
|
|
+ print("------------------------------")
|
|
|
|
+
|
|
def generate_model(input_shape):
|
|
def generate_model(input_shape):
|
|
|
|
|
|
model = Sequential()
|
|
model = Sequential()
|
|
@@ -86,41 +111,41 @@ def generate_model(input_shape):
|
|
|
|
|
|
model.add(Flatten(input_shape=input_shape))
|
|
model.add(Flatten(input_shape=input_shape))
|
|
|
|
|
|
- model.add(Dense(2048))
|
|
|
|
- model.add(Activation('relu'))
|
|
|
|
- model.add(BatchNormalization())
|
|
|
|
- model.add(Dropout(0.2))
|
|
|
|
|
|
+ # model.add(Dense(2048))
|
|
|
|
+ # model.add(Activation('relu'))
|
|
|
|
+ # model.add(BatchNormalization())
|
|
|
|
+ # model.add(Dropout(0.2))
|
|
|
|
|
|
model.add(Dense(1024))
|
|
model.add(Dense(1024))
|
|
model.add(Activation('relu'))
|
|
model.add(Activation('relu'))
|
|
model.add(BatchNormalization())
|
|
model.add(BatchNormalization())
|
|
- model.add(Dropout(0.2))
|
|
|
|
|
|
+ model.add(Dropout(0.4))
|
|
|
|
|
|
model.add(Dense(512))
|
|
model.add(Dense(512))
|
|
model.add(Activation('relu'))
|
|
model.add(Activation('relu'))
|
|
model.add(BatchNormalization())
|
|
model.add(BatchNormalization())
|
|
- model.add(Dropout(0.3))
|
|
|
|
|
|
+ model.add(Dropout(0.4))
|
|
|
|
|
|
model.add(Dense(256))
|
|
model.add(Dense(256))
|
|
model.add(Activation('relu'))
|
|
model.add(Activation('relu'))
|
|
model.add(BatchNormalization())
|
|
model.add(BatchNormalization())
|
|
- model.add(Dropout(0.3))
|
|
|
|
|
|
+ model.add(Dropout(0.4))
|
|
|
|
|
|
model.add(Dense(128))
|
|
model.add(Dense(128))
|
|
model.add(Activation('relu'))
|
|
model.add(Activation('relu'))
|
|
model.add(BatchNormalization())
|
|
model.add(BatchNormalization())
|
|
- model.add(Dropout(0.3))
|
|
|
|
|
|
+ model.add(Dropout(0.4))
|
|
|
|
|
|
model.add(Dense(20))
|
|
model.add(Dense(20))
|
|
model.add(Activation('relu'))
|
|
model.add(Activation('relu'))
|
|
model.add(BatchNormalization())
|
|
model.add(BatchNormalization())
|
|
- model.add(Dropout(0.3))
|
|
|
|
|
|
+ model.add(Dropout(0.4))
|
|
|
|
|
|
model.add(Dense(1))
|
|
model.add(Dense(1))
|
|
model.add(Activation('sigmoid'))
|
|
model.add(Activation('sigmoid'))
|
|
|
|
|
|
model.compile(loss='binary_crossentropy',
|
|
model.compile(loss='binary_crossentropy',
|
|
- optimizer='adam',
|
|
|
|
|
|
+ optimizer='rmsprop',
|
|
metrics=['accuracy', f1])
|
|
metrics=['accuracy', f1])
|
|
|
|
|
|
return model
|
|
return model
|
|
@@ -155,30 +180,46 @@ def main():
|
|
dataset_test = shuffle(dataset_test)
|
|
dataset_test = shuffle(dataset_test)
|
|
|
|
|
|
# get dataset with equal number of classes occurences
|
|
# get dataset with equal number of classes occurences
|
|
- noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
|
|
|
|
- not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
|
|
|
|
|
|
+ noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 1]
|
|
|
|
+ not_noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 0]
|
|
nb_noisy_train = len(noisy_df_train.index)
|
|
nb_noisy_train = len(noisy_df_train.index)
|
|
|
|
+ nb_not_noisy_train = len(not_noisy_df_train.index)
|
|
|
|
|
|
- noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
|
|
|
|
- not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
|
|
|
|
|
|
+ noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 1]
|
|
|
|
+ not_noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 0]
|
|
nb_noisy_test = len(noisy_df_test.index)
|
|
nb_noisy_test = len(noisy_df_test.index)
|
|
|
|
+ nb_not_noisy_test = len(not_noisy_df_test.index)
|
|
|
|
|
|
- final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
|
|
|
|
- final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
|
|
|
|
|
|
+ final_df_train = pd.concat([not_noisy_df_train, noisy_df_train])
|
|
|
|
+ final_df_test = pd.concat([not_noisy_df_test, noisy_df_test])
|
|
|
|
|
|
# shuffle data another time
|
|
# shuffle data another time
|
|
final_df_train = shuffle(final_df_train)
|
|
final_df_train = shuffle(final_df_train)
|
|
final_df_test = shuffle(final_df_test)
|
|
final_df_test = shuffle(final_df_test)
|
|
|
|
|
|
- final_df_train_size = len(final_df_train.index)
|
|
|
|
- final_df_test_size = len(final_df_test.index)
|
|
|
|
-
|
|
|
|
# use of the whole data set for training
|
|
# use of the whole data set for training
|
|
- x_dataset_train = final_df_train.ix[:,1:]
|
|
|
|
- x_dataset_test = final_df_test.ix[:,1:]
|
|
|
|
|
|
+ x_dataset_train = final_df_train.iloc[:,1:]
|
|
|
|
+ x_dataset_test = final_df_test.iloc[:,1:]
|
|
|
|
+
|
|
|
|
+ y_dataset_train = final_df_train.iloc[:,0]
|
|
|
|
+ y_dataset_test = final_df_test.iloc[:,0]
|
|
|
|
+
|
|
|
|
+ noisy_samples = nb_noisy_test + nb_noisy_train
|
|
|
|
+ not_noisy_samples = nb_not_noisy_test + nb_not_noisy_train
|
|
|
|
+
|
|
|
|
+ total_samples = noisy_samples + not_noisy_samples
|
|
|
|
+
|
|
|
|
+ print('noisy', noisy_samples)
|
|
|
|
+ print('not_noisy', not_noisy_samples)
|
|
|
|
+ print('total', total_samples)
|
|
|
|
+
|
|
|
|
+ class_weight = {
|
|
|
|
+ 0: noisy_samples / float(total_samples),
|
|
|
|
+ 1: not_noisy_samples / float(total_samples)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ print(class_weight)
|
|
|
|
|
|
- y_dataset_train = final_df_train.ix[:,0]
|
|
|
|
- y_dataset_test = final_df_test.ix[:,0]
|
|
|
|
|
|
|
|
#######################
|
|
#######################
|
|
# 2. Getting model
|
|
# 2. Getting model
|
|
@@ -196,7 +237,9 @@ def main():
|
|
x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), p_vector_size, 1)
|
|
x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), p_vector_size, 1)
|
|
x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), p_vector_size, 1)
|
|
x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), p_vector_size, 1)
|
|
|
|
|
|
- model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
|
|
|
|
|
|
+ ival = IntervalEvaluation(validation_data=(x_dataset_test, y_dataset_test), interval=1)
|
|
|
|
+
|
|
|
|
+ model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch, callbacks=[ival], class_weight=class_weight)
|
|
|
|
|
|
score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
|
|
score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
|
|
|
|
|
|
@@ -215,8 +258,11 @@ def main():
|
|
|
|
|
|
# Save results obtained from model
|
|
# Save results obtained from model
|
|
y_test_prediction = model.predict(x_dataset_test)
|
|
y_test_prediction = model.predict(x_dataset_test)
|
|
|
|
+ y_test_prediction = [ 0 if y < 0.5 else 1 for y in y_test_prediction ]
|
|
|
|
+
|
|
print("Metrics : ", model.metrics_names)
|
|
print("Metrics : ", model.metrics_names)
|
|
- print("Prediction : ", score)
|
|
|
|
|
|
+ print("ACC score : ", accuracy_score(y_dataset_test, y_test_prediction))
|
|
|
|
+ print("F1 score : ", f1_score(y_dataset_test, y_test_prediction))
|
|
print("ROC AUC : ", roc_auc_score(y_dataset_test, y_test_prediction))
|
|
print("ROC AUC : ", roc_auc_score(y_dataset_test, y_test_prediction))
|
|
|
|
|
|
|
|
|