|
@@ -0,0 +1,179 @@
|
|
|
+import numpy as np
|
|
|
+import pandas as pd
|
|
|
+import sys, os, argparse
|
|
|
+import json
|
|
|
+
|
|
|
+import cv2
|
|
|
+
|
|
|
+from sklearn.utils import shuffle
|
|
|
+
|
|
|
+from keras.preprocessing.image import ImageDataGenerator
|
|
|
+from keras.models import Sequential
|
|
|
+from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
|
|
|
+from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
|
|
|
+from keras import backend as K
|
|
|
+from keras.utils import plot_model
|
|
|
+
|
|
|
+from modules.utils import config as cfg
|
|
|
+from sklearn.metrics import roc_auc_score
|
|
|
+
|
|
|
+img_width, img_height = 200, 200
|
|
|
+batch_size = 32
|
|
|
+
|
|
|
+# 1 because we have 1 color canal
|
|
|
+if K.image_data_format() == 'channels_first':
|
|
|
+ input_shape = (1, img_width, img_height)
|
|
|
+else:
|
|
|
+ input_shape = (img_width, img_height, 1)
|
|
|
+
|
|
|
+
|
|
|
+def generate_model(_input_shape):
|
|
|
+
|
|
|
+ model = Sequential()
|
|
|
+
|
|
|
+ model.add(Conv2D(60, (2, 2), input_shape=_input_shape))
|
|
|
+ model.add(Activation('relu'))
|
|
|
+ model.add(MaxPooling2D(pool_size=(2, 2)))
|
|
|
+
|
|
|
+ model.add(Conv2D(40, (2, 2)))
|
|
|
+ model.add(Activation('relu'))
|
|
|
+ model.add(MaxPooling2D(pool_size=(2, 2)))
|
|
|
+
|
|
|
+ model.add(Conv2D(20, (2, 2)))
|
|
|
+ model.add(Activation('relu'))
|
|
|
+ model.add(MaxPooling2D(pool_size=(2, 2)))
|
|
|
+
|
|
|
+ model.add(Flatten())
|
|
|
+
|
|
|
+ model.add(Dense(140))
|
|
|
+ model.add(Activation('relu'))
|
|
|
+ model.add(BatchNormalization())
|
|
|
+ model.add(Dropout(0.4))
|
|
|
+
|
|
|
+ model.add(Dense(120))
|
|
|
+ model.add(Activation('relu'))
|
|
|
+ model.add(BatchNormalization())
|
|
|
+ model.add(Dropout(0.4))
|
|
|
+
|
|
|
+ model.add(Dense(80))
|
|
|
+ model.add(Activation('relu'))
|
|
|
+ model.add(BatchNormalization())
|
|
|
+ model.add(Dropout(0.4))
|
|
|
+
|
|
|
+ model.add(Dense(40))
|
|
|
+ model.add(Activation('relu'))
|
|
|
+ model.add(BatchNormalization())
|
|
|
+ model.add(Dropout(0.4))
|
|
|
+
|
|
|
+ model.add(Dense(20))
|
|
|
+ model.add(Activation('relu'))
|
|
|
+ model.add(BatchNormalization())
|
|
|
+ model.add(Dropout(0.4))
|
|
|
+
|
|
|
+ model.add(Dense(1))
|
|
|
+ model.add(Activation('sigmoid'))
|
|
|
+
|
|
|
+ model.compile(loss='binary_crossentropy',
|
|
|
+ optimizer='rmsprop',
|
|
|
+ metrics=['accuracy'])
|
|
|
+
|
|
|
+ return model
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+
|
|
|
+ parser = argparse.ArgumentParser(description="Train Keras model and save it into .json file")
|
|
|
+
|
|
|
+ parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .test)')
|
|
|
+ parser.add_argument('--output', type=str, help='output file name desired for model (without .json extension)')
|
|
|
+
|
|
|
+ args = parser.parse_args()
|
|
|
+
|
|
|
+ p_data_file = args.data
|
|
|
+ p_output = args.output
|
|
|
+
|
|
|
+ ########################
|
|
|
+ # 1. Get and prepare data
|
|
|
+ ########################
|
|
|
+ print("Preparing data...")
|
|
|
+ dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
|
|
|
+ dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
|
|
|
+
|
|
|
+ # default first shuffle of data
|
|
|
+ dataset_train = shuffle(dataset_train)
|
|
|
+ dataset_test = shuffle(dataset_test)
|
|
|
+
|
|
|
+ print("Reading all images data...")
|
|
|
+ dataset_train[1] = dataset_train[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE).reshape(input_shape))
|
|
|
+ dataset_test[1] = dataset_test[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE).reshape(input_shape))
|
|
|
+
|
|
|
+
|
|
|
+ # get dataset with equal number of classes occurences
|
|
|
+ noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
|
|
|
+ not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
|
|
|
+ nb_noisy_train = len(noisy_df_train.index)
|
|
|
+
|
|
|
+ noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
|
|
|
+ not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
|
|
|
+ nb_noisy_test = len(noisy_df_test.index)
|
|
|
+
|
|
|
+ final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
|
|
|
+ final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
|
|
|
+
|
|
|
+ # shuffle data another time
|
|
|
+ final_df_train = shuffle(final_df_train)
|
|
|
+ final_df_test = shuffle(final_df_test)
|
|
|
+
|
|
|
+ final_df_train_size = len(final_df_train.index)
|
|
|
+ final_df_test_size = len(final_df_test.index)
|
|
|
+
|
|
|
+ # use of the whole data set for training
|
|
|
+ x_dataset_train = final_df_train.ix[:,1:]
|
|
|
+ x_dataset_test = final_df_test.ix[:,1:]
|
|
|
+
|
|
|
+ y_dataset_train = final_df_train.ix[:,0]
|
|
|
+ y_dataset_test = final_df_test.ix[:,0]
|
|
|
+
|
|
|
+ x_data_train = []
|
|
|
+ for item in x_dataset_train.values:
|
|
|
+ #print("Item is here", item)
|
|
|
+ x_data_train.append(item[0])
|
|
|
+
|
|
|
+ x_data_train = np.array(x_data_train)
|
|
|
+ print("End of loading data..")
|
|
|
+
|
|
|
+ print(x_data_train.shape)
|
|
|
+ print(x_data_train[0])
|
|
|
+
|
|
|
+ #######################
|
|
|
+ # 2. Getting model
|
|
|
+ #######################
|
|
|
+
|
|
|
+ model = generate_model(input_shape)
|
|
|
+ model.summary()
|
|
|
+
|
|
|
+ model.fit(x_data_train, y_dataset_train.values, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
|
|
|
+
|
|
|
+ score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=cfg.keras_batch)
|
|
|
+
|
|
|
+ if not os.path.exists(cfg.saved_models_folder):
|
|
|
+ os.makedirs(cfg.saved_models_folder)
|
|
|
+
|
|
|
+ # save the model into HDF5 file
|
|
|
+ model_output_path = os.path.join(cfg.saved_models_folder, p_output + '.json')
|
|
|
+ json_model_content = model.to_json()
|
|
|
+
|
|
|
+ with open(model_output_path, 'w') as f:
|
|
|
+ print("Model saved into ", model_output_path)
|
|
|
+ json.dump(json_model_content, f, indent=4)
|
|
|
+
|
|
|
+ model.save_weights(model_output_path.replace('.json', '.h5'))
|
|
|
+
|
|
|
+ # Save results obtained from model
|
|
|
+ y_test_prediction = model.predict(x_dataset_test)
|
|
|
+ print("Metrics : ", model.metrics_names)
|
|
|
+ print("Prediction : ", score)
|
|
|
+ print("ROC AUC : ", roc_auc_score(y_dataset_test, y_test_prediction))
|
|
|
+
|
|
|
+if __name__== "__main__":
|
|
|
+ main()
|