123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- import os, sys, argparse
- import numpy as np
- import json
- import matplotlib.pyplot as plt
- from joblib import dump
- import tensorflow as tf
- from sklearn.model_selection import train_test_split
- from sklearn.model_selection import cross_val_score
- from sklearn.model_selection import KFold
- from sklearn.pipeline import Pipeline
- from sklearn.preprocessing import MinMaxScaler
- from keras.models import Sequential
- from keras.layers import Dense, Dropout
- from keras.wrappers.scikit_learn import KerasRegressor
- from keras import backend as K
- import modules.config as cfg
- import modules.metrics as metrics
- def train(_data_file, _model_name):
- # get length of data
- dataset=np.loadtxt(_data_file, delimiter=';')
- y = dataset[:,0]
- X = dataset[:,1:]
- _, nb_elem = X.shape
- y=np.reshape(y, (-1,1))
- scaler = MinMaxScaler()
- scaler.fit(X)
- scaler.fit(y)
-
- xscale=scaler.transform(X)
- yscale=scaler.transform(y)
- X_train, X_test, y_train, y_test = train_test_split(xscale, yscale)
- # define keras NN structure
- model = Sequential()
- model.add(Dense(200, input_dim=nb_elem, kernel_initializer='normal', activation='relu'))
- model.add(Dropout(0.2))
- model.add(Dense(100, activation='relu'))
- model.add(Dropout(0.2))
- model.add(Dense(50, activation='relu'))
- model.add(Dropout(0.2))
- model.add(Dense(10, activation='relu'))
- model.add(Dropout(0.2))
- model.add(Dense(1, activation='linear'))
- model.summary()
- # Set expected metrics
- # TODO : add coefficients of determination as metric ? Or always use MSE/MAE
- model.compile(loss='mse', optimizer='adam', metrics=['mse', 'mae'])
- history = model.fit(X_train, y_train, epochs=50, batch_size=50, verbose=1, validation_split=0.2)
- # save the model into json/HDF5 file
- if not os.path.exists(cfg.saved_models_folder):
- os.makedirs(cfg.saved_models_folder)
- model_output_path = os.path.join(cfg.saved_models_folder, _model_name + '.json')
- json_model_content = model.to_json()
- with open(model_output_path, 'w') as f:
- print("Model saved into ", model_output_path)
- json.dump(json_model_content, f, indent=4)
- model.save_weights(model_output_path.replace('.json', '.h5'))
- # save score into global_result.csv file
- # with open(cfg.global_result_filepath, "a") as f:
- # f.write(_model_name + ';' + str(len(y)) + ';' + str(coeff[0]) + ';\n')
- # Save plot info using model name
- plt.figure(figsize=(30, 22))
- plt.plot(history.history['loss'])
- plt.plot(history.history['val_loss'])
- plt.title('model loss', fontsize=20)
- plt.ylabel('loss', fontsize=16)
- plt.xlabel('epoch', fontsize=16)
- plt.legend(['train', 'validation'], loc='upper left', fontsize=16)
- plt.savefig(model_output_path.replace('.json', '.png'))
- def main():
- parser = argparse.ArgumentParser(description="Train model and saved it")
- parser.add_argument('--data', type=str, help='Filename of dataset')
- parser.add_argument('--model_name', type=str, help='Saved model name')
- args = parser.parse_args()
- param_data_file = args.data
- param_model = args.model_name
- train(param_data_file, param_model)
- if __name__== "__main__":
- main()
|