train_model_keras.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. import os, sys, argparse
  2. import numpy as np
  3. import json
  4. import matplotlib.pyplot as plt
  5. from joblib import dump
  6. import tensorflow as tf
  7. from sklearn.model_selection import train_test_split
  8. from sklearn.model_selection import cross_val_score
  9. from sklearn.model_selection import KFold
  10. from sklearn.pipeline import Pipeline
  11. from sklearn.preprocessing import MinMaxScaler
  12. from keras.models import Sequential
  13. from keras.layers import Dense, Dropout
  14. from keras.wrappers.scikit_learn import KerasRegressor
  15. from keras import backend as K
  16. import modules.config as cfg
  17. import modules.metrics as metrics
  18. def train(_data_file, _model_name):
  19. # get length of data
  20. dataset=np.loadtxt(_data_file, delimiter=';')
  21. y = dataset[:,0]
  22. X = dataset[:,1:]
  23. print(X.shape)
  24. _, nb_elem = X.shape
  25. y=np.reshape(y, (-1,1))
  26. scaler = MinMaxScaler()
  27. print(scaler.fit(X))
  28. print(scaler.fit(y))
  29. xscale=scaler.transform(X)
  30. yscale=scaler.transform(y)
  31. X_train, X_test, y_train, y_test = train_test_split(xscale, yscale)
  32. # define keras NN structure
  33. model = Sequential()
  34. model.add(Dense(200, input_dim=nb_elem, kernel_initializer='normal', activation='relu'))
  35. model.add(Dropout(0.2))
  36. model.add(Dense(100, activation='relu'))
  37. model.add(Dropout(0.2))
  38. model.add(Dense(50, activation='relu'))
  39. model.add(Dropout(0.2))
  40. model.add(Dense(10, activation='relu'))
  41. model.add(Dropout(0.2))
  42. model.add(Dense(1, activation='linear'))
  43. model.summary()
  44. # Set expected metrics
  45. # TODO : add coefficients of determination as metric ? Or always use MSE/MAE
  46. model.compile(loss='mse', optimizer='adam', metrics=['mse', 'mae'])
  47. history = model.fit(X_train, y_train, epochs=150, batch_size=50, verbose=1, validation_split=0.2)
  48. # Save model
  49. print(history.history.keys())
  50. y_predicted = model.predict(X_test)
  51. len_shape, _ = y_predicted.shape
  52. y_predicted = y_predicted.reshape(len_shape)
  53. coeff = metrics.coefficient_of_determination(y_test, y_predicted)
  54. # save the model into json/HDF5 file
  55. if not os.path.exists(cfg.saved_models_folder):
  56. os.makedirs(cfg.saved_models_folder)
  57. model_output_path = os.path.join(cfg.saved_models_folder, _model_name + '.json')
  58. json_model_content = model.to_json()
  59. with open(model_output_path, 'w') as f:
  60. print("Model saved into ", model_output_path)
  61. json.dump(json_model_content, f, indent=4)
  62. model.save_weights(model_output_path.replace('.json', '.h5'))
  63. # save score into global_result.csv file
  64. with open(cfg.global_result_filepath, "a") as f:
  65. f.write(_model_name + ';' + str(len(y)) + ';' + str(coeff[0]) + ';\n')
  66. # Save plot info using model name
  67. plt.figure(figsize=(30, 22))
  68. plt.plot(history.history['loss'])
  69. plt.plot(history.history['val_loss'])
  70. plt.title('model loss', fontsize=20)
  71. plt.ylabel('loss', fontsize=16)
  72. plt.xlabel('epoch', fontsize=16)
  73. plt.legend(['train', 'validation'], loc='upper left', fontsize=16)
  74. plt.savefig(model_output_path.replace('.json', '.png'))
  75. def main():
  76. parser = argparse.ArgumentParser(description="Train model and saved it")
  77. parser.add_argument('--data', type=str, help='Filename of dataset')
  78. parser.add_argument('--model_name', type=str, help='Saved model name')
  79. args = parser.parse_args()
  80. param_data_file = args.data
  81. param_model = args.model_name
  82. train(param_data_file, param_model)
  83. if __name__== "__main__":
  84. main()