deep_network_keras_svd.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. from keras.preprocessing.image import ImageDataGenerator
  2. from keras.models import Sequential
  3. from keras.layers import Conv1D, MaxPooling1D
  4. from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
  5. from keras.wrappers.scikit_learn import KerasClassifier
  6. from keras import backend as K
  7. import matplotlib.pyplot as plt
  8. from sklearn.utils import shuffle
  9. from sklearn.metrics import roc_auc_score
  10. import numpy as np
  11. import pandas as pd
  12. from ipfml import processing
  13. import modules.utils.config as cfg
  14. from PIL import Image
  15. import sys, os
  16. import argparse
  17. import json
  18. import subprocess
  19. import time
  20. def f1(y_true, y_pred):
  21. def recall(y_true, y_pred):
  22. """Recall metric.
  23. Only computes a batch-wise average of recall.
  24. Computes the recall, a metric for multi-label classification of
  25. how many relevant items are selected.
  26. """
  27. true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
  28. possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
  29. recall = true_positives / (possible_positives + K.epsilon())
  30. return recall
  31. def precision(y_true, y_pred):
  32. """Precision metric.
  33. Only computes a batch-wise average of precision.
  34. Computes the precision, a metric for multi-label classification of
  35. how many selected items are relevant.
  36. """
  37. true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
  38. predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
  39. precision = true_positives / (predicted_positives + K.epsilon())
  40. return precision
  41. precision = precision(y_true, y_pred)
  42. recall = recall(y_true, y_pred)
  43. return 2*((precision*recall)/(precision+recall+K.epsilon()))
  44. def generate_model(input_shape):
  45. model = Sequential()
  46. #model.add(Conv1D(128, (10), input_shape=input_shape))
  47. #model.add(Activation('relu'))
  48. #model.add(Conv1D(128, (10)))
  49. #model.add(Activation('relu'))
  50. #model.add(Conv1D(128, (10)))
  51. #model.add(Activation('relu'))
  52. #model.add(MaxPooling1D(pool_size=(2)))
  53. #model.add(Conv1D(64, (10)))
  54. #model.add(Activation('relu'))
  55. #model.add(Conv1D(64, (10)))
  56. #model.add(Activation('relu'))
  57. #model.add(Conv1D(64, (10)))
  58. #model.add(Activation('relu'))
  59. #model.add(MaxPooling1D(pool_size=(2)))
  60. #model.add(Conv1D(32, (10)))
  61. #model.add(Activation('relu'))
  62. #model.add(Conv1D(32, (10)))
  63. #model.add(Activation('relu'))
  64. #model.add(Conv1D(32, (10)))
  65. #model.add(Activation('relu'))
  66. #model.add(MaxPooling1D(pool_size=(2)))
  67. model.add(Flatten(input_shape=input_shape))
  68. model.add(Dense(2048))
  69. model.add(Activation('relu'))
  70. model.add(BatchNormalization())
  71. model.add(Dropout(0.2))
  72. model.add(Dense(1024))
  73. model.add(Activation('relu'))
  74. model.add(BatchNormalization())
  75. model.add(Dropout(0.2))
  76. model.add(Dense(512))
  77. model.add(Activation('relu'))
  78. model.add(BatchNormalization())
  79. model.add(Dropout(0.3))
  80. model.add(Dense(256))
  81. model.add(Activation('relu'))
  82. model.add(BatchNormalization())
  83. model.add(Dropout(0.3))
  84. model.add(Dense(128))
  85. model.add(Activation('relu'))
  86. model.add(BatchNormalization())
  87. model.add(Dropout(0.3))
  88. model.add(Dense(20))
  89. model.add(Activation('relu'))
  90. model.add(BatchNormalization())
  91. model.add(Dropout(0.3))
  92. model.add(Dense(1))
  93. model.add(Activation('sigmoid'))
  94. model.compile(loss='binary_crossentropy',
  95. optimizer='adam',
  96. metrics=['accuracy', f1])
  97. return model
  98. def main():
  99. parser = argparse.ArgumentParser(description="Process deep_network_keras_svd.py parameters")
  100. parser.add_argument('--data', type=str, help='Data filename prefix to access train and test dataset')
  101. parser.add_argument('--output', type=str, help='Name of filename to save model into')
  102. parser.add_argument('--size', type=int, help='Size of input data vector')
  103. args = parser.parse_args()
  104. p_datafile = args.data
  105. p_output_filename = args.output
  106. p_vector_size = args.size
  107. epochs = 10
  108. batch_size = cfg.keras_batch
  109. input_shape = (p_vector_size, 1)
  110. ###########################
  111. # 1. Get and prepare data
  112. ###########################
  113. dataset_train = pd.read_csv(p_datafile + '.train', header=None, sep=";")
  114. dataset_test = pd.read_csv(p_datafile + '.test', header=None, sep=";")
  115. # default first shuffle of data
  116. dataset_train = shuffle(dataset_train)
  117. dataset_test = shuffle(dataset_test)
  118. # get dataset with equal number of classes occurences
  119. noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
  120. not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
  121. nb_noisy_train = len(noisy_df_train.index)
  122. noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
  123. not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
  124. nb_noisy_test = len(noisy_df_test.index)
  125. final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
  126. final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
  127. # shuffle data another time
  128. final_df_train = shuffle(final_df_train)
  129. final_df_test = shuffle(final_df_test)
  130. final_df_train_size = len(final_df_train.index)
  131. final_df_test_size = len(final_df_test.index)
  132. # use of the whole data set for training
  133. x_dataset_train = final_df_train.ix[:,1:]
  134. x_dataset_test = final_df_test.ix[:,1:]
  135. y_dataset_train = final_df_train.ix[:,0]
  136. y_dataset_test = final_df_test.ix[:,0]
  137. #######################
  138. # 2. Getting model
  139. #######################
  140. model = generate_model(input_shape)
  141. model.summary()
  142. #model = KerasClassifier(build_fn=model, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch, verbose=0)
  143. #######################
  144. # 3. Fit model : use of cross validation to fit model
  145. #######################
  146. # reshape input data
  147. x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), p_vector_size, 1)
  148. x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), p_vector_size, 1)
  149. model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
  150. score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
  151. if not os.path.exists(cfg.saved_models_folder):
  152. os.makedirs(cfg.saved_models_folder)
  153. # save the model into HDF5 file
  154. model_output_path = os.path.join(cfg.saved_models_folder, p_output_filename + '.json')
  155. json_model_content = model.to_json()
  156. with open(model_output_path, 'w') as f:
  157. print("Model saved into ", model_output_path)
  158. json.dump(json_model_content, f, indent=4)
  159. model.save_weights(model_output_path.replace('.json', '.h5'))
  160. # Save results obtained from model
  161. y_test_prediction = model.predict(x_dataset_test)
  162. print("Prediction : ", score)
  163. print("ROC AUC : ", roc_auc_score(y_dataset_test, y_test_prediction))
  164. if __name__== "__main__":
  165. main()