train_model_keras.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. import os, sys, argparse
  2. import numpy as np
  3. import json
  4. import matplotlib.pyplot as plt
  5. from joblib import dump
  6. import tensorflow as tf
  7. from sklearn.model_selection import train_test_split
  8. from sklearn.model_selection import cross_val_score
  9. from sklearn.model_selection import KFold
  10. from sklearn.pipeline import Pipeline
  11. from sklearn.preprocessing import MinMaxScaler
  12. from keras.models import Sequential
  13. from keras.layers import Dense, Dropout
  14. from keras.wrappers.scikit_learn import KerasRegressor
  15. from keras import backend as K
  16. import modules.config as cfg
  17. import modules.metrics as metrics
  18. def train(_data_file, _model_name):
  19. # get length of data
  20. dataset=np.loadtxt(_data_file, delimiter=';')
  21. y = dataset[:,0]
  22. X = dataset[:,1:]
  23. _, nb_elem = X.shape
  24. y=np.reshape(y, (-1,1))
  25. scaler = MinMaxScaler()
  26. scaler.fit(X)
  27. scaler.fit(y)
  28. xscale=scaler.transform(X)
  29. yscale=scaler.transform(y)
  30. X_train, X_test, y_train, y_test = train_test_split(xscale, yscale)
  31. # define keras NN structure
  32. model = Sequential()
  33. model.add(Dense(200, input_dim=nb_elem, kernel_initializer='normal', activation='relu'))
  34. model.add(Dropout(0.2))
  35. model.add(Dense(100, activation='relu'))
  36. model.add(Dropout(0.2))
  37. model.add(Dense(50, activation='relu'))
  38. model.add(Dropout(0.2))
  39. model.add(Dense(10, activation='relu'))
  40. model.add(Dropout(0.2))
  41. model.add(Dense(1, activation='linear'))
  42. model.summary()
  43. # Set expected metrics
  44. # TODO : add coefficients of determination as metric ? Or always use MSE/MAE
  45. model.compile(loss='mse', optimizer='adam', metrics=['mse', 'mae'])
  46. history = model.fit(X_train, y_train, epochs=50, batch_size=50, verbose=1, validation_split=0.2)
  47. # save the model into json/HDF5 file
  48. if not os.path.exists(cfg.saved_models_folder):
  49. os.makedirs(cfg.saved_models_folder)
  50. model_output_path = os.path.join(cfg.saved_models_folder, _model_name + '.json')
  51. json_model_content = model.to_json()
  52. with open(model_output_path, 'w') as f:
  53. print("Model saved into ", model_output_path)
  54. json.dump(json_model_content, f, indent=4)
  55. model.save_weights(model_output_path.replace('.json', '.h5'))
  56. # save score into global_result.csv file
  57. # with open(cfg.global_result_filepath, "a") as f:
  58. # f.write(_model_name + ';' + str(len(y)) + ';' + str(coeff[0]) + ';\n')
  59. # Save plot info using model name
  60. plt.figure(figsize=(30, 22))
  61. plt.plot(history.history['loss'])
  62. plt.plot(history.history['val_loss'])
  63. plt.title('model loss', fontsize=20)
  64. plt.ylabel('loss', fontsize=16)
  65. plt.xlabel('epoch', fontsize=16)
  66. plt.legend(['train', 'validation'], loc='upper left', fontsize=16)
  67. plt.savefig(model_output_path.replace('.json', '.png'))
  68. def main():
  69. parser = argparse.ArgumentParser(description="Train model and saved it")
  70. parser.add_argument('--data', type=str, help='Filename of dataset')
  71. parser.add_argument('--model_name', type=str, help='Saved model name')
  72. args = parser.parse_args()
  73. param_data_file = args.data
  74. param_model = args.model_name
  75. train(param_data_file, param_model)
  76. if __name__== "__main__":
  77. main()