il y a 4 ans · f923d55c3d
--- a/cnn_models.py
+++ b/cnn_models.py
@@ -16,7 +16,8 @@ sys.path.insert(0, '') # trick to enable import of main folder module
 
				 import custom_config as cfg
			
 
				 from models import metrics
			
 
				 
			
 
				-def generate_model_2D(_input_shape):
			
 
				+
			
 
				+def generate_model_2D(_input_shape, _weights_file=None):
			
 
				 
			
 
				     model = Sequential()
			
 
				 
			
@@ -62,6 +63,10 @@ def generate_model_2D(_input_shape):
 
				     model.add(Dense(1))
			
 
				     model.add(Activation('sigmoid'))
			
 
				 
			
 
				+    # reload weights if exists
			
 
				+    if _weights_file is not None:
			
 
				+        model.load_weights(_weights_file)
			
 
				+
			
 
				     model.compile(loss='binary_crossentropy',
			
 
				                   optimizer='rmsprop',
			
 
				                   metrics=['accuracy', metrics.auc])
			
@@ -69,7 +74,7 @@ def generate_model_2D(_input_shape):
 
				     return model
			
 
				 
			
 
				 
			
 
				-def generate_model_3D(_input_shape):
			
 
				+def generate_model_3D(_input_shape, _weights_file=None):
			
 
				 
			
 
				     model = Sequential()
			
 
				 
			
@@ -117,6 +122,10 @@ def generate_model_3D(_input_shape):
 
				     model.add(Dense(1))
			
 
				     model.add(Activation('sigmoid'))
			
 
				 
			
 
				+    # reload weights if exists
			
 
				+    if _weights_file is not None:
			
 
				+        model.load_weights(_weights_file)
			
 
				+
			
 
				     model.compile(loss='binary_crossentropy',
			
 
				                   optimizer='rmsprop',
			
 
				                   metrics=['accuracy', metrics.auc])
			
@@ -125,7 +134,7 @@ def generate_model_3D(_input_shape):
 
				 
			
 
				 
			
 
				 # using transfer learning (VGG19)
			
 
				-def generate_model_3D_TL(_input_shape):
			
 
				+def generate_model_3D_TL(_input_shape, _weights_file=None):
			
 
				 
			
 
				     # load pre-trained model
			
 
				     model = VGG19(weights='imagenet', include_top=False, input_shape=_input_shape)
			
@@ -188,6 +197,10 @@ def generate_model_3D_TL(_input_shape):
 
				 
			
 
				     model_final.summary()
			
 
				 
			
 
				+    # reload weights if exists
			
 
				+    if _weights_file is not None:
			
 
				+        model.load_weights(_weights_file)
			
 
				+
			
 
				     model_final.compile(loss='binary_crossentropy',
			
 
				                   optimizer='rmsprop',
			
 
				                   metrics=['accuracy', metrics.auc])
			
@@ -195,16 +208,16 @@ def generate_model_3D_TL(_input_shape):
 
				     return model_final
			
 
				 
			
 
				 
			
 
				-def get_model(n_channels, _input_shape, tl=False):
			
 
				+def get_model(n_channels, _input_shape, _tl=False, _weights_file=None):
			
 
				     
			
 
				-    if tl:
			
 
				+    if _tl:
			
 
				         if n_channels == 3:
			
 
				-            return generate_model_3D_TL(_input_shape)
			
 
				+            return generate_model_3D_TL(_input_shape, _weights_file)
			
 
				         else:
			
 
				             print("Can't use transfer learning with only 1 channel")
			
 
				 
			
 
				     if n_channels == 1:
			
 
				-        return generate_model_2D(_input_shape)
			
 
				+        return generate_model_2D(_input_shape, _weights_file)
			
 
				 
			
 
				     if n_channels == 3:
			
 
				-        return generate_model_3D(_input_shape)
			
 
				+        return generate_model_3D(_input_shape, _weights_file)
			
--- a/generate_symlinks.sh
+++ b/generate_symlinks.sh
@@ -1,40 +0,0 @@
 
				-#! /bin/bash
			
 
				-
			
 
				-
			
 
				-if [ -z "$1" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need to specify orval you want to use (in /scratch folder)"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-if [ -z "$2" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need to specify where you want to store data"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-
			
 
				-echo "Creating links into /scratch folder"
			
 
				-
			
 
				-scratch="/scratch"
			
 
				-orval=$1
			
 
				-path=$2
			
 
				-
			
 
				-
			
 
				-for link in {"data","results","saved_models","models_info","models_backup","threshold_map","learned_zones","custom_norm"}; do
			
 
				-    
			
 
				-    if [ -L ${link} ]; then
			
 
				-        rm ${link}
			
 
				-    fi
			
 
				-    
			
 
				-    fullpath=${scratch}/${orval}/${path}/${link}
			
 
				-
			
 
				-    if [ ! -d "${fullpath}" ]; then
			
 
				-        mkdir -p ${fullpath}
			
 
				-    fi
			
 
				-    
			
 
				-    # remove `orval` name for running part
			
 
				-    ln -s ${scratch}/${path}/${link} ${link}
			
 
				-done
			
--- a/modules
+++ b/modules
@@ -1 +1 @@
 
				-Subproject commit 283a56becc8ca9103ac3d9f1cdaa6f9daa2dcb5c
			
 
				+Subproject commit 5f7d01f3c5eaca0abf3950663ed7e8db9f92634b
			
--- a/prediction_model.py
+++ b/prediction_model.py
@@ -77,8 +77,8 @@ def main():
 
				     dataset[1] = dataset[1].apply(lambda x: np.array(x).reshape(input_shape))
			
 
				 
			
 
				     # use of the whole data set for training
			
 
				-    x_dataset = dataset.ix[:,1:]
			
 
				-    y_dataset = dataset.ix[:,0]
			
 
				+    x_dataset = dataset.iloc[:,1:]
			
 
				+    y_dataset = dataset.iloc[:,0]
			
 
				 
			
 
				     x_data = []
			
 
				     for item in x_dataset.values:
			
@@ -125,7 +125,7 @@ def main():
 
				 
			
 
				     # add information into file
			
 
				     with open(perf_file_path, 'a') as f:
			
 
				-        line = p_data_file + ';' + p_model_file + ';' + str(acc_score) + ';' + str(f1_data_score) + ';' + str(recall_data_score) + ';' + str(pres_score) + ';' + str(roc_score)
			
 
				+        line = p_data_file + ';' + str(len(dataset)) + ';' + p_model_file + ';' + str(acc_score) + ';' + str(f1_data_score) + ';' + str(recall_data_score) + ';' + str(pres_score) + ';' + str(roc_score) + ';\n'
			
 
				         f.write(line)
			
 
				 
			
 
				 if __name__== "__main__":
			
--- a/train_model.py
+++ b/train_model.py
@@ -31,6 +31,7 @@ def main():
 
				     parser.add_argument('--tl', type=int, help='use or not of transfer learning (`VGG network`)', default=0, choices=[0, 1])
			
 
				     parser.add_argument('--batch_size', type=int, help='batch size used as model input', default=cfg.keras_batch)
			
 
				     parser.add_argument('--epochs', type=int, help='number of epochs used for training model', default=cfg.keras_epochs)
			
 
				+    parser.add_argument('--balancing', type=int, help='specify if balacing of classes is done or not', default="1")
			
 
				     #parser.add_argument('--val_size', type=float, help='percent of validation data during training process', default=cfg.val_dataset_size)
			
 
				 
			
 
				 
			
@@ -41,6 +42,8 @@ def main():
 
				     p_tl          = args.tl
			
 
				     p_batch_size  = args.batch_size
			
 
				     p_epochs      = args.epochs
			
 
				+    p_balancing   = bool(args.balancing)
			
 
				+
			
 
				     #p_val_size    = args.val_size
			
 
				     initial_epoch = 0
			
 
				         
			
@@ -79,29 +82,35 @@ def main():
 
				         else:
			
 
				             input_shape = (img_width, img_height, n_channels)
			
 
				 
			
 
				-    # `:` is the separator used for getting each img path
			
 
				-    if n_channels > 1:
			
 
				-        dataset_train[1] = dataset_train[1].apply(lambda x: [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')])
			
 
				-        dataset_val[1] = dataset_val[1].apply(lambda x: [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')])
			
 
				-    else:
			
 
				-        dataset_train[1] = dataset_train[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE))
			
 
				-        dataset_val[1] = dataset_val[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE))
			
 
				+    # get dataset with equal number of classes occurences if wished
			
 
				+    if p_balancing:
			
 
				+        print("Balancing of data")
			
 
				+        noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 1]
			
 
				+        not_noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 0]
			
 
				+        nb_noisy_train = len(noisy_df_train.index)
			
 
				 
			
 
				-    # reshape array data
			
 
				-    dataset_train[1] = dataset_train[1].apply(lambda x: np.array(x).reshape(input_shape))
			
 
				-    dataset_val[1] = dataset_val[1].apply(lambda x: np.array(x).reshape(input_shape))
			
 
				+        noisy_df_val = dataset_val[dataset_val.iloc[:, 0] == 1]
			
 
				+        not_noisy_df_val = dataset_val[dataset_val.iloc[:, 0] == 0]
			
 
				+        nb_noisy_val = len(noisy_df_val.index)
			
 
				 
			
 
				-    # get dataset with equal number of classes occurences
			
 
				-    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
			
 
				-    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
			
 
				-    nb_noisy_train = len(noisy_df_train.index)
			
 
				+        final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
			
 
				+        final_df_val = pd.concat([not_noisy_df_val[0:nb_noisy_val], noisy_df_val])
			
 
				+    else:
			
 
				+        print("No balancing of data")
			
 
				+        final_df_train = dataset_train
			
 
				+        final_df_val = dataset_val
			
 
				 
			
 
				-    noisy_df_val = dataset_val[dataset_val.ix[:, 0] == 1]
			
 
				-    not_noisy_df_val = dataset_val[dataset_val.ix[:, 0] == 0]
			
 
				-    nb_noisy_val = len(noisy_df_val.index)
			
 
				+        # `:` is the separator used for getting each img path
			
 
				+    if n_channels > 1:
			
 
				+        final_df_train[1] = final_df_train[1].apply(lambda x: [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')])
			
 
				+        final_df_val[1] = final_df_val[1].apply(lambda x: [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')])
			
 
				+    else:
			
 
				+        final_df_train[1] = final_df_train[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE))
			
 
				+        final_df_val[1] = final_df_val[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE))
			
 
				 
			
 
				-    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
			
 
				-    final_df_val = pd.concat([not_noisy_df_val[0:nb_noisy_val], noisy_df_val])
			
 
				+    # reshape array data
			
 
				+    final_df_train[1] = final_df_train[1].apply(lambda x: np.array(x).reshape(input_shape))
			
 
				+    final_df_val[1] = final_df_val[1].apply(lambda x: np.array(x).reshape(input_shape))
			
 
				 
			
 
				     # shuffle data another time
			
 
				     final_df_train = shuffle(final_df_train)
			
@@ -117,11 +126,11 @@ def main():
 
				     print("----------------------------------------------------------")
			
 
				 
			
 
				     # use of the whole data set for training
			
 
				-    x_dataset_train = final_df_train.ix[:,1:]
			
 
				-    x_dataset_val = final_df_val.ix[:,1:]
			
 
				+    x_dataset_train = final_df_train.iloc[:,1:]
			
 
				+    x_dataset_val = final_df_val.iloc[:,1:]
			
 
				 
			
 
				-    y_dataset_train = final_df_train.ix[:,0]
			
 
				-    y_dataset_val = final_df_val.ix[:,0]
			
 
				+    y_dataset_train = final_df_train.iloc[:,0]
			
 
				+    y_dataset_val = final_df_val.iloc[:,0]
			
 
				 
			
 
				     x_data_train = []
			
 
				     for item in x_dataset_train.values:
			
@@ -156,22 +165,37 @@ def main():
 
				     checkpoint = ModelCheckpoint(filepath, monitor='val_auc', verbose=1, save_best_only=True, mode='max')
			
 
				     callbacks_list = [checkpoint]
			
 
				 
			
 
				-    model = models.get_model(n_channels, input_shape, p_tl)
			
 
				-    model.summary()
			
 
				-
			
 
				+    
			
 
				     # check if backup already exists
			
 
				+    weights_filepath = None
			
 
				     backups = sorted(os.listdir(model_backup_folder))
			
 
				 
			
 
				     if len(backups) > 0:
			
 
				-        # TODO : check of initial epoch
			
 
				-        last_backup = backups[-1]
			
 
				-        last_epoch = int(last_backup.split('__')[1].replace('.hdf5', ''))
			
 
				-        initial_epoch = last_epoch
			
 
				+
			
 
				+        # retrieve last backup epoch of model 
			
 
				+        last_model_backup = None
			
 
				+        max_last_epoch = 0
			
 
				+
			
 
				+        for backup in backups:
			
 
				+
			
 
				+            last_epoch = int(backup.split('__')[1].replace('.hdf5', ''))
			
 
				+
			
 
				+            if last_epoch > max_last_epoch and last_epoch < p_epochs:
			
 
				+                max_last_epoch = last_epoch
			
 
				+                last_model_backup = backup
			
 
				+
			
 
				+        initial_epoch = max_last_epoch
			
 
				         print("-------------------------------------------------")
			
 
				-        print("Previous backup model found with already", last_epoch, "done...")
			
 
				-        print("Resuming from epoch", str(last_epoch + 1))
			
 
				+        print("Previous backup model found",  last_model_backup, "with already", initial_epoch, "done...")
			
 
				+        print("Resuming from epoch", str(initial_epoch + 1))
			
 
				         print("-------------------------------------------------")
			
 
				 
			
 
				+        # load weights
			
 
				+        weights_filepath = os.path.join(model_backup_folder, last_model_backup)
			
 
				+
			
 
				+    model = models.get_model(n_channels, input_shape, p_tl, weights_filepath)
			
 
				+    model.summary()
			
 
				+
			
 
				     # concatenate train and validation data (`validation_split` param will do the separation into keras model)
			
 
				     y_data = np.concatenate([y_dataset_train.values, y_dataset_val.values])
			
 
				     x_data = np.concatenate([x_data_train, x_data_val])