Parcourir la source

- add models backups for LSTM
- Give more information when loading data to user

Jérôme BUISINE il y a 3 ans
Parent
commit
70f5d74832
4 fichiers modifiés avec 190 ajouts et 107 suppressions
  1. 16 33
      cnn_models.py
  2. 1 1
      generate/generate_dataset.py
  3. 85 11
      train_lstm_weighted.py
  4. 88 62
      train_model.py

+ 16 - 33
cnn_models.py

@@ -17,19 +17,19 @@ import custom_config as cfg
 #from models import metrics
 #from models import metrics
 
 
 
 
-def generate_model_2D(_input_shape, _weights_file=None):
+def generate_model_2D(_input_shape):
 
 
     model = Sequential()
     model = Sequential()
 
 
-    model.add(Conv2D(60, (2, 2), input_shape=_input_shape))
+    model.add(Conv2D(140, (3, 3), input_shape=_input_shape))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(MaxPooling2D(pool_size=(2, 2)))
     model.add(MaxPooling2D(pool_size=(2, 2)))
 
 
-    model.add(Conv2D(40, (2, 2)))
+    model.add(Conv2D(70, (3, 3)))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(MaxPooling2D(pool_size=(2, 2)))
     model.add(MaxPooling2D(pool_size=(2, 2)))
 
 
-    model.add(Conv2D(20, (2, 2)))
+    model.add(Conv2D(20, (3, 3)))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(MaxPooling2D(pool_size=(2, 2)))
     model.add(MaxPooling2D(pool_size=(2, 2)))
 
 
@@ -63,10 +63,6 @@ def generate_model_2D(_input_shape, _weights_file=None):
     model.add(Dense(2))
     model.add(Dense(2))
     model.add(Activation('softmax'))
     model.add(Activation('softmax'))
 
 
-    # reload weights if exists
-    if _weights_file is not None:
-        model.load_weights(_weights_file)
-
     model.compile(loss='categorical_crossentropy',
     model.compile(loss='categorical_crossentropy',
                   optimizer='adam',
                   optimizer='adam',
                   #metrics=['accuracy', metrics.auc])
                   #metrics=['accuracy', metrics.auc])
@@ -75,42 +71,37 @@ def generate_model_2D(_input_shape, _weights_file=None):
     return model
     return model
 
 
 
 
-def generate_model_3D(_input_shape, _weights_file=None):
+def generate_model_3D(_input_shape):
 
 
     model = Sequential()
     model = Sequential()
 
 
     print(_input_shape)
     print(_input_shape)
 
 
-    model.add(Conv3D(60, (1, 2, 2), input_shape=_input_shape))
+    model.add(Conv3D(200, (1, 3, 3), input_shape=_input_shape))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
 
 
-    model.add(Conv3D(40, (1, 2, 2)))
+    model.add(Conv3D(100, (1, 3, 3)))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
 
 
-    model.add(Conv3D(20, (1, 2, 2)))
+    model.add(Conv3D(40, (1, 3, 3)))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
 
 
     model.add(Flatten())
     model.add(Flatten())
 
 
-    model.add(Dense(140))
-    model.add(Activation('relu'))
-    model.add(BatchNormalization())
-    model.add(Dropout(0.5))
-
-    model.add(Dense(120))
+    model.add(Dense(256))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
 
 
-    model.add(Dense(80))
+    model.add(Dense(128))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
 
 
-    model.add(Dense(40))
+    model.add(Dense(64))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
@@ -123,10 +114,6 @@ def generate_model_3D(_input_shape, _weights_file=None):
     model.add(Dense(2))
     model.add(Dense(2))
     model.add(Activation('sigmoid'))
     model.add(Activation('sigmoid'))
 
 
-    # reload weights if exists
-    if _weights_file is not None:
-        model.load_weights(_weights_file)
-
     model.compile(loss='categorical_crossentropy',
     model.compile(loss='categorical_crossentropy',
                   optimizer='rmsprop',
                   optimizer='rmsprop',
                   #metrics=['accuracy', metrics.auc])
                   #metrics=['accuracy', metrics.auc])
@@ -136,7 +123,7 @@ def generate_model_3D(_input_shape, _weights_file=None):
 
 
 
 
 # using transfer learning (VGG19)
 # using transfer learning (VGG19)
-def generate_model_3D_TL(_input_shape, _weights_file=None):
+def generate_model_3D_TL(_input_shape):
 
 
     # load pre-trained model
     # load pre-trained model
     model = VGG19(weights='imagenet', include_top=False, input_shape=_input_shape)
     model = VGG19(weights='imagenet', include_top=False, input_shape=_input_shape)
@@ -199,10 +186,6 @@ def generate_model_3D_TL(_input_shape, _weights_file=None):
 
 
     model_final.summary()
     model_final.summary()
 
 
-    # reload weights if exists
-    if _weights_file is not None:
-        model.load_weights(_weights_file)
-
     model_final.compile(loss='binary_crossentropy',
     model_final.compile(loss='binary_crossentropy',
                   optimizer='rmsprop',
                   optimizer='rmsprop',
                 #   metrics=['accuracy', metrics.auc])
                 #   metrics=['accuracy', metrics.auc])
@@ -211,16 +194,16 @@ def generate_model_3D_TL(_input_shape, _weights_file=None):
     return model_final
     return model_final
 
 
 
 
-def get_model(n_channels, _input_shape, _tl=False, _weights_file=None):
+def get_model(n_channels, _input_shape, _tl=False):
     
     
     if _tl:
     if _tl:
         if n_channels == 3:
         if n_channels == 3:
-            return generate_model_3D_TL(_input_shape, _weights_file)
+            return generate_model_3D_TL(_input_shape)
         else:
         else:
             print("Can't use transfer learning with only 1 channel")
             print("Can't use transfer learning with only 1 channel")
 
 
     if n_channels == 1:
     if n_channels == 1:
-        return generate_model_2D(_input_shape, _weights_file)
+        return generate_model_2D(_input_shape)
 
 
     if n_channels >= 2:
     if n_channels >= 2:
-        return generate_model_3D(_input_shape, _weights_file)
+        return generate_model_3D(_input_shape)

+ 1 - 1
generate/generate_dataset.py

@@ -42,7 +42,7 @@ generic_output_file_svd = '_random.csv'
 def generate_data_model(_filename, _transformations, _scenes_list, _nb_zones = 4, _random=0):
 def generate_data_model(_filename, _transformations, _scenes_list, _nb_zones = 4, _random=0):
 
 
     output_train_filename = _filename + ".train"
     output_train_filename = _filename + ".train"
-    output_test_filename = _filename + ".val"
+    output_test_filename = _filename + ".test"
 
 
     if not '/' in output_train_filename:
     if not '/' in output_train_filename:
         raise Exception("Please select filename with directory path to save data. Example : data/dataset")
         raise Exception("Please select filename with directory path to save data. Example : data/dataset")

+ 85 - 11
train_lstm_weighted.py

@@ -1,5 +1,5 @@
 # main imports
 # main imports
-import argparse
+import argparse, sys
 import numpy as np
 import numpy as np
 import pandas as pd
 import pandas as pd
 import os
 import os
@@ -14,6 +14,8 @@ from ipfml import utils
 from keras.layers import Dense, Dropout, LSTM, Embedding, GRU, BatchNormalization, ConvLSTM2D, Conv3D, Flatten
 from keras.layers import Dense, Dropout, LSTM, Embedding, GRU, BatchNormalization, ConvLSTM2D, Conv3D, Flatten
 from keras.preprocessing.sequence import pad_sequences
 from keras.preprocessing.sequence import pad_sequences
 from keras.models import Sequential
 from keras.models import Sequential
+from keras.models import load_model
+from keras.callbacks import ModelCheckpoint
 from sklearn.metrics import roc_auc_score, accuracy_score
 from sklearn.metrics import roc_auc_score, accuracy_score
 import tensorflow as tf
 import tensorflow as tf
 from keras import backend as K
 from keras import backend as K
@@ -23,6 +25,30 @@ from joblib import dump
 
 
 import custom_config as cfg
 import custom_config as cfg
 
 
+# global variables
+n_counter = 0
+total_samples = 0
+
+def write_progress(progress):
+    '''
+    Display progress information as progress bar
+    '''
+    barWidth = 180
+
+    output_str = "["
+    pos = barWidth * progress
+    for i in range(barWidth):
+        if i < pos:
+           output_str = output_str + "="
+        elif i == pos:
+           output_str = output_str + ">"
+        else:
+            output_str = output_str + " "
+
+    output_str = output_str + "] " + str(int(progress * 100.0)) + " %\r"
+    print(output_str)
+    sys.stdout.write("\033[F")
+
 
 
 def build_input(df, seq_norm):
 def build_input(df, seq_norm):
     """Convert dataframe to numpy array input with timesteps as float array
     """Convert dataframe to numpy array input with timesteps as float array
@@ -35,6 +61,8 @@ def build_input(df, seq_norm):
         {np.ndarray} -- input LSTM data as numpy array
         {np.ndarray} -- input LSTM data as numpy array
     """
     """
 
 
+    global n_counter
+    global total_samples
     arr = []
     arr = []
 
 
     # for each input line
     # for each input line
@@ -58,6 +86,10 @@ def build_input(df, seq_norm):
             
             
         arr.append(seq_arr)
         arr.append(seq_arr)
 
 
+        # update progress
+        n_counter += 1
+        write_progress(n_counter / float(total_samples))
+
     arr = np.array(arr)
     arr = np.array(arr)
     print(arr.shape)
     print(arr.shape)
 
 
@@ -129,16 +161,16 @@ def create_model(_input_shape):
     model.add(Dense(1, activation='sigmoid'))
     model.add(Dense(1, activation='sigmoid'))
     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
 
 
-    print ('Compiling...')
-    # model.compile(loss='binary_crossentropy',
-    #               optimizer='rmsprop',
-    #               metrics=['accuracy'])
+    print ('-- Compiling...')
 
 
     return model
     return model
 
 
 
 
 def main():
 def main():
 
 
+    # get this variable as global
+    global total_samples
+
     parser = argparse.ArgumentParser(description="Read and compute training of LSTM model")
     parser = argparse.ArgumentParser(description="Read and compute training of LSTM model")
 
 
     parser.add_argument('--train', type=str, help='input train dataset', required=True)
     parser.add_argument('--train', type=str, help='input train dataset', required=True)
@@ -157,9 +189,14 @@ def main():
     p_batch_size   = args.batch_size
     p_batch_size   = args.batch_size
     p_seq_norm     = bool(args.seq_norm)
     p_seq_norm     = bool(args.seq_norm)
 
 
+    print('-----------------------------')
+    print("----- Preparing data... -----")
     dataset_train = pd.read_csv(p_train, header=None, sep=';')
     dataset_train = pd.read_csv(p_train, header=None, sep=';')
     dataset_test = pd.read_csv(p_test, header=None, sep=';')
     dataset_test = pd.read_csv(p_test, header=None, sep=';')
 
 
+    print("-- Train set size : ", len(dataset_train))
+    print("-- Test set size : ", len(dataset_test))
+
     # getting weighted class over the whole dataset
     # getting weighted class over the whole dataset
     noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 1]
     noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 1]
     not_noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 0]
     not_noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 0]
@@ -176,9 +213,12 @@ def main():
 
 
     total_samples = noisy_samples + not_noisy_samples
     total_samples = noisy_samples + not_noisy_samples
 
 
-    print('noisy', noisy_samples)
-    print('not_noisy', not_noisy_samples)
-    print('total', total_samples)
+    print('-----------------------------')
+    print('---- Dataset information ----')
+    print('-- noisy:', noisy_samples)
+    print('-- not_noisy:', not_noisy_samples)
+    print('-- total:', total_samples)
+    print('-----------------------------')
 
 
     class_weight = {
     class_weight = {
         0: noisy_samples / float(total_samples),
         0: noisy_samples / float(total_samples),
@@ -189,6 +229,9 @@ def main():
     final_df_train = sklearn.utils.shuffle(dataset_train)
     final_df_train = sklearn.utils.shuffle(dataset_train)
     final_df_test = sklearn.utils.shuffle(dataset_test)
     final_df_test = sklearn.utils.shuffle(dataset_test)
 
 
+    print('---- Loading dataset.... ----')
+    print('-----------------------------\n')
+
     # split dataset into X_train, y_train, X_test, y_test
     # split dataset into X_train, y_train, X_test, y_test
     X_train_all = final_df_train.loc[:, 1:].apply(lambda x: x.astype(str).str.split('::'))
     X_train_all = final_df_train.loc[:, 1:].apply(lambda x: x.astype(str).str.split('::'))
     X_train_all = build_input(X_train_all, p_seq_norm)
     X_train_all = build_input(X_train_all, p_seq_norm)
@@ -199,14 +242,45 @@ def main():
     y_test = final_df_test.loc[:, 0].astype('int')
     y_test = final_df_test.loc[:, 0].astype('int')
 
 
     input_shape = (X_train_all.shape[1], X_train_all.shape[2], X_train_all.shape[3], X_train_all.shape[4])
     input_shape = (X_train_all.shape[1], X_train_all.shape[2], X_train_all.shape[3], X_train_all.shape[4])
-    print('Training data input shape', input_shape)
-    model = create_model(input_shape)
+    
+    
+    print('\n-----------------------------')
+    print('-- Training data input shape', input_shape)
+    print('-----------------------------')
+
+    # create backup folder for current model
+    model_backup_folder = os.path.join(cfg.backup_model_folder, p_output)
+    if not os.path.exists(model_backup_folder):
+        os.makedirs(model_backup_folder)
+
+    # add of callback models
+    filepath = os.path.join(cfg.backup_model_folder, p_output, p_output + "-_{epoch:03d}.h5")
+    checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=0, mode='max')
+    callbacks_list = [checkpoint]
+
+    
+    # check if backup already exists
+    backups = sorted(os.listdir(model_backup_folder))
+
+    if len(backups) > 0:
+        last_backup_file = backups[-1]
+        model = load_model(last_backup_file)
+
+        # get initial epoch
+        initial_epoch = int(last_backup_file.split('_')[-1].replace('.h5', ''))
+        print('-----------------------------')  
+        print('-- Restore model from backup...')
+        print('-- Restart training @epoch:', initial_epoch)
+        print('-----------------------------')
+    else:
+        model = create_model(input_shape)
     model.summary()
     model.summary()
 
 
     # prepare train and validation dataset
     # prepare train and validation dataset
     X_train, X_val, y_train, y_val = train_test_split(X_train_all, y_train_all, test_size=0.3, shuffle=False)
     X_train, X_val, y_train, y_val = train_test_split(X_train_all, y_train_all, test_size=0.3, shuffle=False)
 
 
-    print("Fitting model with custom class_weight", class_weight)
+    print("-- Fitting model with custom class_weight", class_weight)
+    print('-----------------------------')
     history = model.fit(X_train, y_train, batch_size=p_batch_size, epochs=p_epochs, validation_data=(X_val, y_val), verbose=1, shuffle=True, class_weight=class_weight)
     history = model.fit(X_train, y_train, batch_size=p_batch_size, epochs=p_epochs, validation_data=(X_val, y_val), verbose=1, shuffle=True, class_weight=class_weight)
 
 
     # list all data in history
     # list all data in history

+ 88 - 62
train_model.py

@@ -8,6 +8,7 @@ import json
 import cnn_models as models
 import cnn_models as models
 import tensorflow as tf
 import tensorflow as tf
 import keras
 import keras
+from keras.models import load_model
 from keras import backend as K
 from keras import backend as K
 from keras.callbacks import ModelCheckpoint
 from keras.callbacks import ModelCheckpoint
 from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
 from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
@@ -23,12 +24,34 @@ sys.path.insert(0, '') # trick to enable import of main folder module
 
 
 import custom_config as cfg
 import custom_config as cfg
 
 
+# counter param
+n_counter = 0
+
+def write_progress(progress):
+    '''
+    Display progress information as progress bar
+    '''
+    barWidth = 180
+
+    output_str = "["
+    pos = barWidth * progress
+    for i in range(barWidth):
+        if i < pos:
+           output_str = output_str + "="
+        elif i == pos:
+           output_str = output_str + ">"
+        else:
+            output_str = output_str + " "
+
+    output_str = output_str + "] " + str(int(progress * 100.0)) + " %\r"
+    print(output_str)
+    sys.stdout.write("\033[F")
 
 
 def main():
 def main():
 
 
     parser = argparse.ArgumentParser(description="Train Keras model and save it into .json file")
     parser = argparse.ArgumentParser(description="Train Keras model and save it into .json file")
 
 
-    parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .val)', required=True)
+    parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .test)', required=True)
     parser.add_argument('--output', type=str, help='output file name desired for model (without .json extension)', required=True)
     parser.add_argument('--output', type=str, help='output file name desired for model (without .json extension)', required=True)
     parser.add_argument('--tl', type=int, help='use or not of transfer learning (`VGG network`)', default=0, choices=[0, 1])
     parser.add_argument('--tl', type=int, help='use or not of transfer learning (`VGG network`)', default=0, choices=[0, 1])
     parser.add_argument('--batch_size', type=int, help='batch size used as model input', default=64)
     parser.add_argument('--batch_size', type=int, help='batch size used as model input', default=64)
@@ -55,18 +78,20 @@ def main():
     ########################
     ########################
     # 1. Get and prepare data
     # 1. Get and prepare data
     ########################
     ########################
-    print("Preparing data...")
+    print('-----------------------------')
+    print("----- Preparing data... -----")
     dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
     dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
     dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
     dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
 
 
-    print("Train set size : ", len(dataset_train))
-    print("Test set size : ", len(dataset_test))
+    print("-- Train set size : ", len(dataset_train))
+    print("-- Test set size : ", len(dataset_test))
 
 
     # default first shuffle of data
     # default first shuffle of data
     dataset_train = shuffle(dataset_train)
     dataset_train = shuffle(dataset_train)
     dataset_test = shuffle(dataset_test)
     dataset_test = shuffle(dataset_test)
 
 
-    print("Reading all images data...")
+    print('-----------------------------')
+    print("--Reading all images data...")
 
 
     # getting number of chanel
     # getting number of chanel
     if p_chanels == 0:
     if p_chanels == 0:
@@ -74,7 +99,7 @@ def main():
     else:
     else:
         n_chanels = p_chanels
         n_chanels = p_chanels
 
 
-    print("Number of chanels : ", n_chanels)
+    print("-- Number of chanels : ", n_chanels)
     img_width, img_height = [ int(s) for s in p_size ]
     img_width, img_height = [ int(s) for s in p_size ]
 
 
     # specify the number of dimensions
     # specify the number of dimensions
@@ -106,32 +131,58 @@ def main():
 
 
     total_samples = noisy_samples + not_noisy_samples
     total_samples = noisy_samples + not_noisy_samples
 
 
-    print('noisy', noisy_samples)
-    print('not_noisy', not_noisy_samples)
-    print('total', total_samples)
+    print('-----------------------------')
+    print('---- Dataset information ----')
+    print('-- noisy:', noisy_samples)
+    print('-- not_noisy:', not_noisy_samples)
+    print('-- total:', total_samples)
+    print('-----------------------------')
 
 
     class_weight = {
     class_weight = {
         0: (noisy_samples / float(total_samples)),
         0: (noisy_samples / float(total_samples)),
         1: (not_noisy_samples / float(total_samples)),
         1: (not_noisy_samples / float(total_samples)),
     }
     }
 
 
-
-
     final_df_train = dataset_train
     final_df_train = dataset_train
     final_df_test = dataset_test
     final_df_test = dataset_test
+    
+    def load_multiple_greyscale(x):
+        # update progress
+        global n_counter
+        n_counter += 1
+        write_progress(n_counter / float(total_samples))
+        return [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')]
+
+    def load_greyscale(x):
+        # update progress
+        global n_counter
+        n_counter += 1
+        write_progress(n_counter / float(total_samples))
+        return cv2.imread(x, cv2.IMREAD_GRAYSCALE)
+
+    def load_rgb(x):
+        # update progress
+        global n_counter
+        n_counter += 1
+        write_progress(n_counter / float(total_samples))
+        return cv2.imread(x)
+
+
+    print('---- Loading dataset.... ----')
+    print('-----------------------------\n')
 
 
     # check if specific number of chanels is used
     # check if specific number of chanels is used
     if p_chanels == 0:
     if p_chanels == 0:
         # `::` is the separator used for getting each img path
         # `::` is the separator used for getting each img path
         if n_chanels > 1:
         if n_chanels > 1:
-            final_df_train[1] = final_df_train[1].apply(lambda x: [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')])
-            final_df_test[1] = final_df_test[1].apply(lambda x: [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')])
+            final_df_train[1] = final_df_train[1].apply(lambda x: load_multiple_greyscale(x))
+            final_df_test[1] = final_df_test[1].apply(lambda x: load_multiple_greyscale(x))
         else:
         else:
-            final_df_train[1] = final_df_train[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE))
-            final_df_test[1] = final_df_test[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE))
+            final_df_train[1] = final_df_train[1].apply(lambda x: load_greyscale(x))
+            final_df_test[1] = final_df_test[1].apply(lambda x: load_greyscale(x))
     else:
     else:
-        final_df_train[1] = final_df_train[1].apply(lambda x: cv2.imread(x))
-        final_df_test[1] = final_df_test[1].apply(lambda x: cv2.imread(x))
+        final_df_train[1] = final_df_train[1].apply(lambda x: load_rgb(x))
+        final_df_test[1] = final_df_test[1].apply(lambda x: load_rgb(x))
 
 
     # reshape array data
     # reshape array data
     final_df_train[1] = final_df_train[1].apply(lambda x: np.array(x).reshape(input_shape))
     final_df_train[1] = final_df_train[1].apply(lambda x: np.array(x).reshape(input_shape))
@@ -141,12 +192,9 @@ def main():
     final_df_train = shuffle(final_df_train)
     final_df_train = shuffle(final_df_train)
     final_df_test = shuffle(final_df_test)
     final_df_test = shuffle(final_df_test)
 
 
-    final_df_train_size = len(final_df_train.index)
-    final_df_test_size = len(final_df_test.index)
-
-    print("----------------------------------------------------------")
+    print('\n-----------------------------')
     print("Validation split is now set at", p_val_size)
     print("Validation split is now set at", p_val_size)
-    print("----------------------------------------------------------")
+    print('-----------------------------')
 
 
     # use of the whole data set for training
     # use of the whole data set for training
     x_dataset_train = final_df_train.iloc[:,1:]
     x_dataset_train = final_df_train.iloc[:,1:]
@@ -169,10 +217,6 @@ def main():
 
 
     x_data_test = np.array(x_data_test)
     x_data_test = np.array(x_data_test)
 
 
-    print("End of loading data..")
-
-    print("Train set size (after balancing) : ", final_df_train_size)
-    print("Test set size (after balancing) : ", final_df_test_size)
 
 
     #######################
     #######################
     # 2. Getting model
     # 2. Getting model
@@ -184,44 +228,27 @@ def main():
         os.makedirs(model_backup_folder)
         os.makedirs(model_backup_folder)
 
 
     # add of callback models
     # add of callback models
-    filepath = os.path.join(cfg.backup_model_folder, p_output, p_output + "-{accuracy:02f}-{val_accuracy:02f}__{epoch:02d}.hdf5")
-    checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
+    filepath = os.path.join(cfg.backup_model_folder, p_output, p_output + "-_{epoch:03d}.h5")
+    checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=0, mode='max')
     callbacks_list = [checkpoint]
     callbacks_list = [checkpoint]
 
 
     
     
     # check if backup already exists
     # check if backup already exists
-    weights_filepath = None
     backups = sorted(os.listdir(model_backup_folder))
     backups = sorted(os.listdir(model_backup_folder))
 
 
     if len(backups) > 0:
     if len(backups) > 0:
-
-        # retrieve last backup epoch of model 
-        last_model_backup = None
-        max_last_epoch = 0
-
-        for backup in backups:
-
-            last_epoch = int(backup.split('__')[1].replace('.h5', ''))
-
-            if last_epoch > max_last_epoch and last_epoch < p_epochs:
-                max_last_epoch = last_epoch
-                last_model_backup = backup
-
-        if last_model_backup is None:
-            print("Epochs asked is already computer. Noee")
-            sys.exit(1)
-
-        initial_epoch = max_last_epoch
-        print("-------------------------------------------------")
-        print("Previous backup model found",  last_model_backup, "with already", initial_epoch, " epoch(s) done...")
-        print("Resuming from epoch", str(initial_epoch + 1))
-        print("-------------------------------------------------")
-
-        # load weights
-        weights_filepath = os.path.join(model_backup_folder, last_model_backup)
-
-    print(n_chanels)
-    model = models.get_model(n_chanels, input_shape, p_tl, weights_filepath)
+        last_backup_file = backups[-1]
+        model = load_model(last_backup_file)
+
+        # get initial epoch
+        initial_epoch = int(last_backup_file.split('_')[-1].replace('.h5', ''))
+        print('-----------------------------')  
+        print('-- Restore model from backup...')
+        print('-- Restart training @epoch:', initial_epoch)
+        print('-----------------------------')
+    else:
+        model = models.get_model(n_chanels, input_shape, p_tl)
+        
     model.summary()
     model.summary()
 
 
     # prepare train and validation dataset
     # prepare train and validation dataset
@@ -231,7 +258,9 @@ def main():
     y_val = to_categorical(y_val)
     y_val = to_categorical(y_val)
     y_test = to_categorical(y_dataset_test)
     y_test = to_categorical(y_dataset_test)
 
 
-    print("Fitting model with custom class_weight", class_weight)
+    print('-----------------------------')
+    print("-- Fitting model with custom class_weight", class_weight)
+    print('-----------------------------')
     model.fit(X_train, y_train, 
     model.fit(X_train, y_train, 
         validation_data=(X_val, y_val), 
         validation_data=(X_val, y_val), 
         initial_epoch=initial_epoch, 
         initial_epoch=initial_epoch, 
@@ -247,7 +276,7 @@ def main():
     if not os.path.exists(cfg.output_models):
     if not os.path.exists(cfg.output_models):
         os.makedirs(cfg.output_models)
         os.makedirs(cfg.output_models)
 
 
-    # save the model into HDF5 file
+    # save the model into H5 file
     model_output_path = os.path.join(cfg.output_models, p_output + '.h5')
     model_output_path = os.path.join(cfg.output_models, p_output + '.h5')
     model.save(model_output_path)
     model.save(model_output_path)
 
 
@@ -256,9 +285,6 @@ def main():
     y_val_prediction = model.predict(X_val)
     y_val_prediction = model.predict(X_val)
     y_test_prediction = model.predict(x_dataset_test)
     y_test_prediction = model.predict(x_dataset_test)
 
 
-    # y_train_prediction = [1 if x > 0.5 else 0 for x in y_train_prediction]
-    # y_val_prediction = [1 if x > 0.5 else 0 for x in y_val_prediction]
-
     y_train_prediction = np.argmax(y_train_prediction, axis=1)
     y_train_prediction = np.argmax(y_train_prediction, axis=1)
     y_val_prediction = np.argmax(y_val_prediction, axis=1)
     y_val_prediction = np.argmax(y_val_prediction, axis=1)