cnn_keras_svd.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. from keras.preprocessing.image import ImageDataGenerator
  2. from keras.models import Sequential
  3. from keras.layers import Conv1D, MaxPooling1D
  4. from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
  5. from keras import backend as K
  6. import matplotlib.pyplot as plt
  7. from sklearn.utils import shuffle
  8. import numpy as np
  9. import pandas as pd
  10. from ipfml import image_processing
  11. from PIL import Image
  12. import sys, os, getopt
  13. import subprocess
  14. import time
  15. vector_size = 100
  16. epochs = 100
  17. batch_size = 24
  18. input_shape = (vector_size, 1)
  19. filename = "svd_model"
  20. def f1(y_true, y_pred):
  21. def recall(y_true, y_pred):
  22. """Recall metric.
  23. Only computes a batch-wise average of recall.
  24. Computes the recall, a metric for multi-label classification of
  25. how many relevant items are selected.
  26. """
  27. true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
  28. possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
  29. recall = true_positives / (possible_positives + K.epsilon())
  30. return recall
  31. def precision(y_true, y_pred):
  32. """Precision metric.
  33. Only computes a batch-wise average of precision.
  34. Computes the precision, a metric for multi-label classification of
  35. how many selected items are relevant.
  36. """
  37. true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
  38. predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
  39. precision = true_positives / (predicted_positives + K.epsilon())
  40. return precision
  41. precision = precision(y_true, y_pred)
  42. recall = recall(y_true, y_pred)
  43. return 2*((precision*recall)/(precision+recall+K.epsilon()))
  44. def generate_model():
  45. model = Sequential()
  46. #model.add(Conv1D(128, (10), input_shape=input_shape))
  47. #model.add(Activation('relu'))
  48. #model.add(Conv1D(128, (10)))
  49. #model.add(Activation('relu'))
  50. #model.add(Conv1D(128, (10)))
  51. #model.add(Activation('relu'))
  52. #model.add(MaxPooling1D(pool_size=(2)))
  53. #model.add(Conv1D(64, (10)))
  54. #model.add(Activation('relu'))
  55. #model.add(Conv1D(64, (10)))
  56. #model.add(Activation('relu'))
  57. #model.add(Conv1D(64, (10)))
  58. #model.add(Activation('relu'))
  59. #model.add(MaxPooling1D(pool_size=(2)))
  60. #model.add(Conv1D(32, (10)))
  61. #model.add(Activation('relu'))
  62. #model.add(Conv1D(32, (10)))
  63. #model.add(Activation('relu'))
  64. #model.add(Conv1D(32, (10)))
  65. #model.add(Activation('relu'))
  66. #model.add(MaxPooling1D(pool_size=(2)))
  67. model.add(Flatten(input_shape=input_shape))
  68. #model.add(Dense(2048))
  69. #model.add(Activation('relu'))
  70. #model.add(BatchNormalization())
  71. #model.add(Dropout(0.3))
  72. model.add(Dense(1024))
  73. model.add(Activation('relu'))
  74. model.add(BatchNormalization())
  75. model.add(Dropout(0.4))
  76. model.add(Dense(512))
  77. model.add(Activation('relu'))
  78. model.add(BatchNormalization())
  79. model.add(Dropout(0.4))
  80. model.add(Dense(256))
  81. model.add(Activation('relu'))
  82. model.add(BatchNormalization())
  83. model.add(Dropout(0.4))
  84. model.add(Dense(128))
  85. model.add(Activation('relu'))
  86. model.add(BatchNormalization())
  87. model.add(Dropout(0.4))
  88. model.add(Dense(20))
  89. model.add(Activation('relu'))
  90. model.add(BatchNormalization())
  91. model.add(Dropout(0.4))
  92. model.add(Dense(1))
  93. model.add(Activation('sigmoid'))
  94. model.compile(loss='binary_crossentropy',
  95. optimizer='adam',
  96. metrics=['accuracy', f1])
  97. return model
  98. def main():
  99. if len(sys.argv) <= 1:
  100. print('Run with default parameters...')
  101. print('python save_model_result_in_md.py --data filename')
  102. sys.exit(2)
  103. try:
  104. opts, args = getopt.getopt(sys.argv[1:], "hd", ["help=", "data="])
  105. except getopt.GetoptError:
  106. # print help information and exit:
  107. print('python save_model_result_in_md.py --data filename')
  108. sys.exit(2)
  109. for o, a in opts:
  110. if o == "-h":
  111. print('python save_model_result_in_md.py --data filename')
  112. sys.exit()
  113. elif o in ("-d", "--data"):
  114. p_datafile = a
  115. else:
  116. assert False, "unhandled option"
  117. ###########################
  118. # 1. Get and prepare data
  119. ###########################
  120. dataset_train = pd.read_csv(p_datafile + '.train', header=None, sep=";")
  121. dataset_test = pd.read_csv(p_datafile + '.test', header=None, sep=";")
  122. # default first shuffle of data
  123. dataset_train = shuffle(dataset_train)
  124. dataset_test = shuffle(dataset_test)
  125. # get dataset with equal number of classes occurences
  126. noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
  127. not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
  128. nb_noisy_train = len(noisy_df_train.index)
  129. noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
  130. not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
  131. nb_noisy_test = len(noisy_df_test.index)
  132. final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
  133. final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
  134. # shuffle data another time
  135. final_df_train = shuffle(final_df_train)
  136. final_df_test = shuffle(final_df_test)
  137. final_df_train_size = len(final_df_train.index)
  138. final_df_test_size = len(final_df_test.index)
  139. # use of the whole data set for training
  140. x_dataset_train = final_df_train.ix[:,1:]
  141. x_dataset_test = final_df_test.ix[:,1:]
  142. y_dataset_train = final_df_train.ix[:,0]
  143. y_dataset_test = final_df_test.ix[:,0]
  144. #######################
  145. # 2. Getting model
  146. #######################
  147. model = generate_model()
  148. model.summary()
  149. #######################
  150. # 3. Fit model : use of cross validation to fit model
  151. #######################
  152. # reshape input data
  153. x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), vector_size, 1)
  154. x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), vector_size, 1)
  155. model.fit(x_dataset_train, y_dataset_train, epochs=epochs, batch_size=batch_size, validation_split=0.20)
  156. score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
  157. print(score)
  158. if __name__== "__main__":
  159. main()