train_model_keras.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. # main imports
  2. import os, sys, argparse
  3. import numpy as np
  4. import json
  5. import matplotlib.pyplot as plt
  6. # model imports
  7. from joblib import dump
  8. import tensorflow as tf
  9. from sklearn.model_selection import train_test_split
  10. from sklearn.model_selection import cross_val_score
  11. from sklearn.model_selection import KFold
  12. from sklearn.pipeline import Pipeline
  13. from sklearn.preprocessing import MinMaxScaler
  14. from keras.models import Sequential
  15. from keras.layers import Dense, Dropout
  16. from keras.wrappers.scikit_learn import KerasRegressor
  17. from keras import backend as K
  18. # modules and config imports
  19. sys.path.insert(0, '') # trick to enable import of main folder module
  20. import custom_config as cfg
  21. def train(_data_file, _model_name):
  22. # get length of data
  23. dataset=np.loadtxt(_data_file, delimiter=';')
  24. y = dataset[:,0]
  25. X = dataset[:,1:]
  26. _, nb_elem = X.shape
  27. y=np.reshape(y, (-1,1))
  28. scaler = MinMaxScaler()
  29. scaler.fit(X)
  30. scaler.fit(y)
  31. xscale=scaler.transform(X)
  32. yscale=scaler.transform(y)
  33. X_train, X_test, y_train, y_test = train_test_split(xscale, yscale)
  34. # define keras NN structure
  35. model = Sequential()
  36. model.add(Dense(200, input_dim=nb_elem, kernel_initializer='normal', activation='relu'))
  37. model.add(Dropout(0.2))
  38. model.add(Dense(100, activation='relu'))
  39. model.add(Dropout(0.2))
  40. model.add(Dense(50, activation='relu'))
  41. model.add(Dropout(0.2))
  42. model.add(Dense(10, activation='relu'))
  43. model.add(Dropout(0.2))
  44. model.add(Dense(1, activation='linear'))
  45. model.summary()
  46. # Set expected metrics
  47. # TODO : add coefficients of determination as metric ? Or always use MSE/MAE
  48. model.compile(loss='mse', optimizer='adam', metrics=['mse', 'mae'])
  49. history = model.fit(X_train, y_train, epochs=cfg.keras_epochs, batch_size=50, verbose=1, validation_split=0.2)
  50. # save the model into json/HDF5 file
  51. if not os.path.exists(cfg.saved_models_folder):
  52. os.makedirs(cfg.saved_models_folder)
  53. model_output_path = os.path.join(cfg.saved_models_folder, _model_name + '.json')
  54. json_model_content = model.to_json()
  55. with open(model_output_path, 'w') as f:
  56. print("Model saved into ", model_output_path)
  57. json.dump(json_model_content, f, indent=4)
  58. model.save_weights(model_output_path.replace('.json', '.h5'))
  59. # save score into global_result.csv file
  60. # if not os.path.exists(cfg.results_information_folder):
  61. # os.makedirs(cfg.results_information_folder)
  62. #
  63. # with open(cfg.global_result_filepath, "a") as f:
  64. # f.write(_model_name + ';' + str(len(y)) + ';' + str(coeff[0]) + ';\n')
  65. # Save plot info using model name
  66. plt.figure(figsize=(30, 22))
  67. plt.plot(history.history['loss'])
  68. plt.plot(history.history['val_loss'])
  69. plt.title('model loss', fontsize=20)
  70. plt.ylabel('loss', fontsize=16)
  71. plt.xlabel('epoch', fontsize=16)
  72. plt.legend(['train', 'validation'], loc='upper left', fontsize=16)
  73. plt.savefig(model_output_path.replace('.json', '.png'))
  74. def main():
  75. parser = argparse.ArgumentParser(description="Train model and saved it")
  76. parser.add_argument('--data', type=str, help='Filename of dataset')
  77. parser.add_argument('--model_name', type=str, help='Saved model name')
  78. args = parser.parse_args()
  79. param_data_file = args.data
  80. param_model = args.model_name
  81. train(param_data_file, param_model)
  82. if __name__== "__main__":
  83. main()