deep_network_keras_svd.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. from keras.preprocessing.image import ImageDataGenerator
  2. from keras.models import Sequential
  3. from keras.layers import Conv1D, MaxPooling1D
  4. from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
  5. from keras.wrappers.scikit_learn import KerasClassifier
  6. from keras import backend as K
  7. from sklearn.utils import shuffle
  8. from sklearn.metrics import roc_auc_score
  9. import numpy as np
  10. import pandas as pd
  11. from ipfml import processing
  12. import modules.utils.config as cfg
  13. from PIL import Image
  14. import sys, os
  15. import argparse
  16. import json
  17. import subprocess
  18. import time
  19. def f1(y_true, y_pred):
  20. def recall(y_true, y_pred):
  21. """Recall metric.
  22. Only computes a batch-wise average of recall.
  23. Computes the recall, a metric for multi-label classification of
  24. how many relevant items are selected.
  25. """
  26. true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
  27. possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
  28. recall = true_positives / (possible_positives + K.epsilon())
  29. return recall
  30. def precision(y_true, y_pred):
  31. """Precision metric.
  32. Only computes a batch-wise average of precision.
  33. Computes the precision, a metric for multi-label classification of
  34. how many selected items are relevant.
  35. """
  36. true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
  37. predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
  38. precision = true_positives / (predicted_positives + K.epsilon())
  39. return precision
  40. precision = precision(y_true, y_pred)
  41. recall = recall(y_true, y_pred)
  42. return 2*((precision*recall)/(precision+recall+K.epsilon()))
  43. def generate_model(input_shape):
  44. model = Sequential()
  45. #model.add(Conv1D(128, (10), input_shape=input_shape))
  46. #model.add(Activation('relu'))
  47. #model.add(Conv1D(128, (10)))
  48. #model.add(Activation('relu'))
  49. #model.add(Conv1D(128, (10)))
  50. #model.add(Activation('relu'))
  51. #model.add(MaxPooling1D(pool_size=(2)))
  52. #model.add(Conv1D(64, (10)))
  53. #model.add(Activation('relu'))
  54. #model.add(Conv1D(64, (10)))
  55. #model.add(Activation('relu'))
  56. #model.add(Conv1D(64, (10)))
  57. #model.add(Activation('relu'))
  58. #model.add(MaxPooling1D(pool_size=(2)))
  59. #model.add(Conv1D(32, (10)))
  60. #model.add(Activation('relu'))
  61. #model.add(Conv1D(32, (10)))
  62. #model.add(Activation('relu'))
  63. #model.add(Conv1D(32, (10)))
  64. #model.add(Activation('relu'))
  65. #model.add(MaxPooling1D(pool_size=(2)))
  66. model.add(Flatten(input_shape=input_shape))
  67. model.add(Dense(2048))
  68. model.add(Activation('relu'))
  69. model.add(BatchNormalization())
  70. model.add(Dropout(0.2))
  71. model.add(Dense(1024))
  72. model.add(Activation('relu'))
  73. model.add(BatchNormalization())
  74. model.add(Dropout(0.2))
  75. model.add(Dense(512))
  76. model.add(Activation('relu'))
  77. model.add(BatchNormalization())
  78. model.add(Dropout(0.3))
  79. model.add(Dense(256))
  80. model.add(Activation('relu'))
  81. model.add(BatchNormalization())
  82. model.add(Dropout(0.3))
  83. model.add(Dense(128))
  84. model.add(Activation('relu'))
  85. model.add(BatchNormalization())
  86. model.add(Dropout(0.3))
  87. model.add(Dense(20))
  88. model.add(Activation('relu'))
  89. model.add(BatchNormalization())
  90. model.add(Dropout(0.3))
  91. model.add(Dense(1))
  92. model.add(Activation('sigmoid'))
  93. model.compile(loss='binary_crossentropy',
  94. optimizer='adam',
  95. metrics=['accuracy', f1])
  96. return model
  97. def main():
  98. parser = argparse.ArgumentParser(description="Process deep_network_keras_svd.py parameters")
  99. parser.add_argument('--data', type=str, help='Data filename prefix to access train and test dataset')
  100. parser.add_argument('--output', type=str, help='Name of filename to save model into')
  101. parser.add_argument('--size', type=int, help='Size of input data vector')
  102. args = parser.parse_args()
  103. p_datafile = args.data
  104. p_output_filename = args.output
  105. p_vector_size = args.size
  106. epochs = 10
  107. batch_size = cfg.keras_batch
  108. input_shape = (p_vector_size, 1)
  109. ###########################
  110. # 1. Get and prepare data
  111. ###########################
  112. dataset_train = pd.read_csv(p_datafile + '.train', header=None, sep=";")
  113. dataset_test = pd.read_csv(p_datafile + '.test', header=None, sep=";")
  114. # default first shuffle of data
  115. dataset_train = shuffle(dataset_train)
  116. dataset_test = shuffle(dataset_test)
  117. # get dataset with equal number of classes occurences
  118. noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
  119. not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
  120. nb_noisy_train = len(noisy_df_train.index)
  121. noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
  122. not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
  123. nb_noisy_test = len(noisy_df_test.index)
  124. final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
  125. final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
  126. # shuffle data another time
  127. final_df_train = shuffle(final_df_train)
  128. final_df_test = shuffle(final_df_test)
  129. final_df_train_size = len(final_df_train.index)
  130. final_df_test_size = len(final_df_test.index)
  131. # use of the whole data set for training
  132. x_dataset_train = final_df_train.ix[:,1:]
  133. x_dataset_test = final_df_test.ix[:,1:]
  134. y_dataset_train = final_df_train.ix[:,0]
  135. y_dataset_test = final_df_test.ix[:,0]
  136. #######################
  137. # 2. Getting model
  138. #######################
  139. model = generate_model(input_shape)
  140. model.summary()
  141. #model = KerasClassifier(build_fn=model, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch, verbose=0)
  142. #######################
  143. # 3. Fit model : use of cross validation to fit model
  144. #######################
  145. # reshape input data
  146. x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), p_vector_size, 1)
  147. x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), p_vector_size, 1)
  148. model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
  149. score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
  150. if not os.path.exists(cfg.saved_models_folder):
  151. os.makedirs(cfg.saved_models_folder)
  152. # save the model into HDF5 file
  153. model_output_path = os.path.join(cfg.saved_models_folder, p_output_filename + '.json')
  154. json_model_content = model.to_json()
  155. with open(model_output_path, 'w') as f:
  156. print("Model saved into ", model_output_path)
  157. json.dump(json_model_content, f, indent=4)
  158. model.save_weights(model_output_path.replace('.json', '.h5'))
  159. # Save results obtained from model
  160. y_test_prediction = model.predict(x_dataset_test)
  161. print("Metrics : ", model.metrics_names)
  162. print("Prediction : ", score)
  163. print("ROC AUC : ", roc_auc_score(y_dataset_test, y_test_prediction))
  164. if __name__== "__main__":
  165. main()