Parcourir la source

3D CNN model added; Add of simulation curves for 2D and 3D models;

Jérôme BUISINE il y a 4 ans
Parent
commit
f4f4555442
11 fichiers modifiés avec 409 ajouts et 377 suppressions
  1. 10 1
      README.md
  2. 118 0
      display_simulation_curves.py
  3. 63 21
      generate_dataset.py
  4. 0 220
      generate_dataset_3D.py
  5. 8 0
      modules/models/metrics.py
  6. 125 0
      modules/models/models.py
  7. 1 1
      modules/utils/config.py
  8. 11 7
      predict_seuil_expe_curve.py
  9. 43 8
      run.sh
  10. 0 34
      run_test.sh
  11. 30 85
      train_model.py

+ 10 - 1
README.md

@@ -33,7 +33,13 @@ List of expected parameter by reconstruction method:
 
 **__Example:__**
 ```bash
-python generate_dataset_3D.py --output data/output_data_filename --metrics "svd_reconstruction, ipca_reconstruction, fast_ica_reconstruction" --renderer "maxwell" --scenes "A, D, G, H" --params "100, 200 :: 50, 10 :: 50" --nb_zones 10 --random 1
+python generate_dataset.py --output data/output_data_filename --metrics "svd_reconstruction, ipca_reconstruction, fast_ica_reconstruction" --renderer "maxwell" --scenes "A, D, G, H" --params "100, 200 :: 50, 10 :: 50" --nb_zones 10 --random 1
+```
+
+
+Then, train model using your custom dataset:
+```bash
+python train_model --data data/custom_dataset --output output_model_name
 ```
 
 ## Modules
@@ -41,6 +47,9 @@ python generate_dataset_3D.py --output data/output_data_filename --metrics "svd_
 This project contains modules:
 - **modules/utils/config.py**: *Store all configuration information about the project and dataset information*
 - **modules/utils/data.py**: *Usefull methods used for dataset*
+- **modules/models/metrics.py**: *Usefull methods for performance comparisons*
+- **modules/models/models.py**: *Generation of CNN model*
+- **modules/classes/Transformation.py**: *Transformation class for more easily manage computation*
 
 All these modules will be enhanced during development of the project
 

+ 118 - 0
display_simulation_curves.py

@@ -0,0 +1,118 @@
+import numpy as np
+import pandas as pd
+
+import matplotlib.pyplot as plt
+import os, sys, argparse
+
+from modules.utils import config as cfg
+
+learned_zones_folder = cfg.learned_zones_folder
+models_name          = cfg.models_names_list
+label_freq           = 6
+
+def display_curves(folder_path, model_name):
+    """
+    @brief Method used to display simulation given .csv files
+    @param folder_path, folder which contains all .csv files obtained during simulation
+    @param model_name, current name of model
+    @return nothing
+    """
+
+    for name in models_name:
+        if name in model_name:
+            data_filename = model_name
+            learned_zones_folder_path = os.path.join(learned_zones_folder, data_filename)
+
+    data_files = [x for x in os.listdir(folder_path) if '.png' not in x]
+
+    scene_names = [f.split('_')[3] for f in data_files]
+
+    for id, f in enumerate(data_files):
+
+        print(scene_names[id])
+        path_file = os.path.join(folder_path, f)
+
+        scenes_zones_used_file_path = os.path.join(learned_zones_folder_path, scene_names[id] + '.csv')
+
+        zones_used = []
+
+        with open(scenes_zones_used_file_path, 'r') as f:
+            zones_used = [int(x) for x in f.readline().split(';') if x != '']
+
+        print(zones_used)
+
+        df = pd.read_csv(path_file, header=None, sep=";")
+
+        fig=plt.figure(figsize=(35, 22))
+        fig.suptitle("Detection simulation for " + scene_names[id] + " scene", fontsize=20)
+
+        for index, row in df.iterrows():
+
+            row = np.asarray(row)
+
+            threshold = row[2]
+            start_index = row[3]
+            step_value = row[4]
+
+            counter_index = 0
+
+            current_value = start_index
+
+            while(current_value < threshold):
+                counter_index += 1
+                current_value += step_value
+
+            fig.add_subplot(4, 4, (index + 1))
+            plt.plot(row[5:])
+
+            if index in zones_used:
+                ax = plt.gca()
+                ax.set_facecolor((0.9, 0.95, 0.95))
+
+            # draw vertical line from (70,100) to (70, 250)
+            plt.plot([counter_index, counter_index], [-2, 2], 'k-', lw=2, color='red')
+
+            if index % 4 == 0:
+                plt.ylabel('Not noisy / Noisy', fontsize=20)
+
+            if index >= 12:
+                plt.xlabel('Samples per pixel', fontsize=20)
+
+            x_labels = [id * step_value + start_index for id, val in enumerate(row[5:]) if id % label_freq == 0]
+
+            x = [v for v in np.arange(0, len(row[5:])+1) if v % label_freq == 0]
+
+            plt.xticks(x, x_labels, rotation=45)
+            plt.ylim(-1, 2)
+
+        plt.savefig(os.path.join(folder_path, scene_names[id] + '_simulation_curve.png'))
+        #plt.show()
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Display simulations curves from simulation data")
+
+    parser.add_argument('--folder', type=str, help='Folder which contains simulations data for scenes')
+    parser.add_argument('--model', type=str, help='Name of the model used for simulations')
+
+    args = parser.parse_args()
+
+    p_folder = args.folder
+
+    if args.model:
+        p_model = args.model
+    else:
+        # find p_model from folder if model arg not given (folder path need to have model name)
+        if p_folder.split('/')[-1]:
+            p_model = p_folder.split('/')[-1]
+        else:
+            p_model = p_folder.split('/')[-2]
+    
+    print(p_model)
+
+    display_curves(p_folder, p_model)
+
+    print(p_folder)
+
+if __name__== "__main__":
+    main()

+ 63 - 21
generate_dataset.py

@@ -32,7 +32,7 @@ min_max_filename        = cfg.min_max_filename_extension
 scenes_list             = cfg.scenes_names
 scenes_indexes          = cfg.scenes_indices
 choices                 = cfg.normalization_choices
-path                    = cfg.dataset_path
+dataset_path            = cfg.dataset_path
 zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
 
@@ -41,7 +41,7 @@ output_data_folder      = cfg.output_data_folder
 
 generic_output_file_svd = '_random.csv'
 
-def generate_data_model(_scenes_list, _filename, _transformation, _scenes, _nb_zones = 4, _random=0):
+def generate_data_model(_scenes_list, _filename, _transformations, _scenes, _nb_zones = 4, _random=0):
 
     output_train_filename = _filename + ".train"
     output_test_filename = _filename + ".test"
@@ -56,14 +56,14 @@ def generate_data_model(_scenes_list, _filename, _transformation, _scenes, _nb_z
     train_file_data = []
     test_file_data  = []
 
-    scenes = os.listdir(path)
+    scenes = os.listdir(dataset_path)
     # remove min max file from scenes folder
     scenes = [s for s in scenes if min_max_filename not in s]
 
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(_scenes_list):
 
-        scene_path = os.path.join(path, folder_scene)
+        scene_path = os.path.join(dataset_path, folder_scene)
 
         zones_indices = zones
 
@@ -97,20 +97,52 @@ def generate_data_model(_scenes_list, _filename, _transformation, _scenes, _nb_z
             zone_path = os.path.join(scene_path, current_zone_folder)
 
             # custom path for interval of reconstruction and metric
-            metric_interval_path = os.path.join(zone_path, _transformation.getTranformationPath())
 
-            for label in os.listdir(metric_interval_path):
-                label_path = os.path.join(metric_interval_path, label)
+            metrics_path = []
 
-                images = sorted(os.listdir(label_path))
+            for transformation in _transformations:
+                metric_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
+                metrics_path.append(metric_interval_path)
 
-                for img in images:
-                    img_path = os.path.join(label_path, img)
+            # as labels are same for each metric
+            for label in os.listdir(metrics_path[0]):
+
+                label_metrics_path = []
+
+                for path in metrics_path:
+                    label_path = os.path.join(path, label)
+                    label_metrics_path.append(label_path)
+
+                # getting images list for each metric
+                metrics_images_list = []
+                    
+                for label_path in label_metrics_path:
+                    images = sorted(os.listdir(label_path))
+                    metrics_images_list.append(images)
+
+                # construct each line using all images path of each
+                for index_image in range(0, len(metrics_images_list[0])):
+                    
+                    images_path = []
+
+                    # getting images with same index and hence name for each metric (transformation)
+                    for index_metric in range(0, len(metrics_path)):
+                        img_path = metrics_images_list[index_metric][index_image]
+                        images_path.append(os.path.join(label_metrics_path[index_metric], img_path))
 
                     if label == cfg.noisy_folder:
-                        line = '1;' + img_path + '\n'
+                        line = '1;'
                     else:
-                        line = '0;' + img_path + '\n'
+                        line = '0;'
+
+                    # compute line information with all images paths
+                    for id_path, img_path in enumerate(images_path):
+                        if id_path < len(images_path) - 1:
+                            line = line + img_path + '::'
+                        else:
+                            line = line + img_path
+                    
+                    line = line + '\n'
 
                     if id_zone < _nb_zones and folder_scene in _scenes:
                         train_file_data.append(line)
@@ -137,11 +169,14 @@ def main():
     parser = argparse.ArgumentParser(description="Compute specific dataset for model using of metric")
 
     parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
-    parser.add_argument('--metric', type=str, 
-                                    help="metric choice in order to compute data", 
-                                    choices=metric_choices,
+    parser.add_argument('--metrics', type=str, 
+                                     help="list of metrics choice in order to compute data",
+                                     default='svd_reconstruction, ipca_reconstruction',
+                                     required=True)
+    parser.add_argument('--params', type=str, 
+                                    help="list of specific param for each metric choice (See README.md for further information in 3D mode)", 
+                                    default='100, 200 :: 50, 25',
                                     required=True)
-    parser.add_argument('--param', type=str, help="specific param for metric (See README.md for further information)")
     parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
     parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set', choices=list(range(1, 17)))
     parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=cfg.renderer_choices, default='all')
@@ -150,15 +185,22 @@ def main():
     args = parser.parse_args()
 
     p_filename = args.output
-    p_metric   = args.metric
-    p_param    = args.param
+    p_metrics  = list(map(str.strip, args.metrics.split(',')))
+    p_params   = list(map(str.strip, args.params.split('::')))
     p_scenes   = args.scenes.split(',')
     p_nb_zones = args.nb_zones
     p_renderer = args.renderer
     p_random   = args.random
 
-    # create new Transformation obj
-    transformation = Transformation(p_metric, p_param)
+    # create list of Transformation
+    transformations = []
+
+    for id, metric in enumerate(p_metrics):
+
+        if metric not in metric_choices:
+            raise ValueError("Unknown metric, please select a correct metric : ", metric_choices)
+
+        transformations.append(Transformation(metric, p_params[id]))
 
     # list all possibles choices of renderer
     scenes_list = dt.get_renderer_scenes_names(p_renderer)
@@ -172,7 +214,7 @@ def main():
         scenes_selected.append(scenes_list[index])
 
     # create database using img folder (generate first time only)
-    generate_data_model(scenes_list, p_filename, transformation, scenes_selected, p_nb_zones, p_random)
+    generate_data_model(scenes_list, p_filename, transformations, scenes_selected, p_nb_zones, p_random)
 
 if __name__== "__main__":
     main()

+ 0 - 220
generate_dataset_3D.py

@@ -1,220 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Jun 19 11:47:42 2019
-
-@author: jbuisine
-"""
-
-import sys, os, argparse
-import numpy as np
-import random
-import time
-import json
-
-from PIL import Image
-from ipfml import processing, metrics, utils
-from skimage import color
-
-from modules.utils import config as cfg
-from modules.utils import data as dt
-
-from transformation_functions import svd_reconstruction
-from modules.classes.Transformation import Transformation
-
-# getting configuration information
-config_filename         = cfg.config_filename
-zone_folder             = cfg.zone_folder
-learned_folder          = cfg.learned_zones_folder
-min_max_filename        = cfg.min_max_filename_extension
-
-# define all scenes values
-scenes_list             = cfg.scenes_names
-scenes_indexes          = cfg.scenes_indices
-choices                 = cfg.normalization_choices
-dataset_path            = cfg.dataset_path
-zones                   = cfg.zones_indices
-seuil_expe_filename     = cfg.seuil_expe_filename
-
-metric_choices          = cfg.metric_choices_labels
-output_data_folder      = cfg.output_data_folder
-
-generic_output_file_svd = '_random.csv'
-
-def generate_data_model(_scenes_list, _filename, _transformations, _scenes, _nb_zones = 4, _random=0):
-
-    output_train_filename = _filename + ".train"
-    output_test_filename = _filename + ".test"
-
-    if not '/' in output_train_filename:
-        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
-
-    # create path if not exists
-    if not os.path.exists(output_data_folder):
-        os.makedirs(output_data_folder)
-
-    train_file_data = []
-    test_file_data  = []
-
-    scenes = os.listdir(dataset_path)
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    # go ahead each scenes
-    for id_scene, folder_scene in enumerate(_scenes_list):
-
-        scene_path = os.path.join(dataset_path, folder_scene)
-
-        zones_indices = zones
-
-        # shuffle list of zones (=> randomly choose zones)
-        # only in random mode
-        if _random:
-            random.shuffle(zones_indices)
-
-         # store zones learned
-        learned_zones_indices = zones_indices[:_nb_zones]
-
-        # write into file
-        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
-
-        if not os.path.exists(folder_learned_path):
-            os.makedirs(folder_learned_path)
-
-        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
-
-        with open(file_learned_path, 'w') as f:
-            for i in learned_zones_indices:
-                f.write(str(i) + ';')
-
-        for id_zone, index_folder in enumerate(zones_indices):
-
-            index_str = str(index_folder)
-            if len(index_str) < 2:
-                index_str = "0" + index_str
-            
-            current_zone_folder = "zone" + index_str
-            zone_path = os.path.join(scene_path, current_zone_folder)
-
-            # custom path for interval of reconstruction and metric
-
-            metrics_path = []
-
-            for transformation in _transformations:
-                metric_interval_path = os.path.join(zone_path, transformation.getTranformationPath())
-                metrics_path.append(metric_interval_path)
-
-            # as labels are same for each metric
-            for label in os.listdir(metrics_path[0]):
-
-                label_metrics_path = []
-
-                for path in metrics_path:
-                    label_path = os.path.join(path, label)
-                    label_metrics_path.append(label_path)
-
-                # getting images list for each metric
-                metrics_images_list = []
-                    
-                for label_path in label_metrics_path:
-                    images = sorted(os.listdir(label_path))
-                    metrics_images_list.append(images)
-
-                # construct each line using all images path of each
-                for index_image in range(0, len(metrics_images_list)):
-                    
-                    images_path = []
-
-                    # getting images with same index and hence name for each metric (transformation)
-                    for index_metric in range(0, len(metrics_path)):
-                        img_path = metrics_images_list[index_metric][index_image]
-                        images_path.append(img_path)
-
-                    if label == cfg.noisy_folder:
-                        line = '1;'
-                    else:
-                        line = '0;'
-
-                    # compute line information with all images paths
-                    for id_path, img_path in enumerate(images_path):
-                        if id_path < len(images_path) - 1:
-                            line = line + img_path + '::'
-                        else:
-                            line = line + img_path
-                    
-                    line = line + '\n'
-
-                    if id_zone < _nb_zones and folder_scene in _scenes:
-                        train_file_data.append(line)
-                    else:
-                        test_file_data.append(line)
-
-    train_file = open(output_train_filename, 'w')
-    test_file = open(output_test_filename, 'w')
-
-    random.shuffle(train_file_data)
-    random.shuffle(test_file_data)
-
-    for line in train_file_data:
-        train_file.write(line)
-
-    for line in test_file_data:
-        test_file.write(line)
-
-    train_file.close()
-    test_file.close()
-
-def main():
-
-    parser = argparse.ArgumentParser(description="Compute specific dataset for model using of metric")
-
-    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
-    parser.add_argument('--metrics', type=str, 
-                                     help="list of metrics choice in order to compute data",
-                                     default='svd_reconstruction, ipca_reconstruction',
-                                     required=True)
-    parser.add_argument('--params', type=str, 
-                                    help="list of specific param for each metric choice (See README.md for further information in 3D mode)", 
-                                    default='100, 200 :: 50, 25',
-                                    required=True)
-    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
-    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set', choices=list(range(1, 17)))
-    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=cfg.renderer_choices, default='all')
-    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
-
-    args = parser.parse_args()
-
-    p_filename = args.output
-    p_metrics  = list(map(str.strip, args.metrics.split(',')))
-    p_params   = list(map(str.strip, args.params.split('::')))
-    p_scenes   = args.scenes.split(',')
-    p_nb_zones = args.nb_zones
-    p_renderer = args.renderer
-    p_random   = args.random
-
-    # create list of Transformation
-    transformations = []
-
-    for id, metric in enumerate(p_metrics):
-
-        if metric not in metric_choices:
-            raise ValueError("Unknown metric, please select a correct metric : ", metric_choices)
-
-        transformations.append(Transformation(metric, p_params[id]))
-
-    # list all possibles choices of renderer
-    scenes_list = dt.get_renderer_scenes_names(p_renderer)
-    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
-
-    # getting scenes from indexes user selection
-    scenes_selected = []
-
-    for scene_id in p_scenes:
-        index = scenes_indices.index(scene_id.strip())
-        scenes_selected.append(scenes_list[index])
-
-    # create database using img folder (generate first time only)
-    generate_data_model(scenes_list, p_filename, transformations, scenes_selected, p_nb_zones, p_random)
-
-if __name__== "__main__":
-    main()

+ 8 - 0
modules/models/metrics.py

@@ -0,0 +1,8 @@
+from keras import backend as K
+import tensorflow as tf
+
+def auc(y_true, y_pred):
+    auc = tf.metrics.auc(y_true, y_pred)[1]
+    K.get_session().run(tf.local_variables_initializer())
+    
+    return auc

+ 125 - 0
modules/models/models.py

@@ -0,0 +1,125 @@
+from keras.preprocessing.image import ImageDataGenerator
+from keras.models import Sequential
+from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Conv3D, MaxPooling3D, AveragePooling3D
+from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
+from keras import backend as K
+import tensorflow as tf
+
+from modules.utils import config as cfg
+from modules.models import metrics
+
+
+def generate_model_2D(_input_shape):
+
+    model = Sequential()
+
+    model.add(Conv2D(60, (2, 2), input_shape=_input_shape))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Conv2D(40, (2, 2)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Conv2D(20, (2, 2)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Flatten())
+
+    model.add(Dense(140))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(120))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(80))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(40))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(20))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(1))
+    model.add(Activation('sigmoid'))
+
+    model.compile(loss='binary_crossentropy',
+                  optimizer='rmsprop',
+                  metrics=['accuracy', metrics.auc])
+
+    return model
+
+def generate_model_3D(_input_shape):
+
+    model = Sequential()
+
+    print(_input_shape)
+
+    model.add(Conv3D(60, (1, 2, 2), input_shape=_input_shape))
+    model.add(Activation('relu'))
+    model.add(MaxPooling3D(pool_size=(1, 2, 2)))
+
+    model.add(Conv3D(40, (1, 2, 2)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling3D(pool_size=(1, 2, 2)))
+
+    model.add(Conv3D(20, (1, 2, 2)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling3D(pool_size=(1, 2, 2)))
+
+    model.add(Flatten())
+
+    model.add(Dense(140))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(120))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(80))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(40))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(20))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.4))
+
+    model.add(Dense(1))
+    model.add(Activation('sigmoid'))
+
+    model.compile(loss='binary_crossentropy',
+                  optimizer='rmsprop',
+                  metrics=['accuracy', metrics.auc])
+
+    return model
+
+
+def get_model(n_channels, _input_shape):
+
+    if n_channels == 1:
+        return generate_model_2D(_input_shape)
+
+    if n_channels == 3:
+        return generate_model_3D(_input_shape)

+ 1 - 1
modules/utils/config.py

@@ -40,7 +40,7 @@ zones_indices                   = np.arange(16)
 
 metric_choices_labels           = ['all', 'svd_reconstruction', 'fast_ica_reconstruction', 'ipca_reconstruction']
 
-keras_epochs                    = 50
+keras_epochs                    = 30
 keras_batch                     = 32
 val_dataset_size                = 0.2
 

+ 11 - 7
predict_seuil_expe_curve.py

@@ -10,6 +10,7 @@ import subprocess
 import time
 
 from modules.utils import config as cfg
+from modules.utils import data as dt
 
 config_filename           = cfg.config_filename
 scenes_path               = cfg.dataset_path
@@ -32,8 +33,6 @@ current_dirpath = os.getcwd()
 
 def main():
 
-    p_custom = False
-        
     parser = argparse.ArgumentParser(description="Script which predicts threshold using specific keras model")
 
     parser.add_argument('--metrics', type=str, 
@@ -44,26 +43,31 @@ def main():
                                     help="list of specific param for each metric choice (See README.md for further information in 3D mode)", 
                                     default='100, 200 :: 50, 25',
                                     required=True)
-    parser.add_argument('--model', type=str, help='.json file of keras model')
+    parser.add_argument('--model', type=str, help='.json file of keras model', required=True)
+    parser.add_argument('--renderer', type=str, 
+                                      help='Renderer choice in order to limit scenes used', 
+                                      choices=cfg.renderer_choices, 
+                                      default='all', 
+                                      required=True)
 
     args = parser.parse_args()
 
     p_metrics    = list(map(str.strip, args.metrics.split(',')))
     p_params     = list(map(str.strip, args.params.split('::')))
     p_model_file = args.model
+    p_renderer   = args.renderer
 
-    args = parser.parse_args()
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
 
     scenes = os.listdir(scenes_path)
-    scenes = [s for s in scenes if s in maxwell_scenes]
 
     print(scenes)
 
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(scenes):
 
-        # only take in consideration maxwell scenes
-        if folder_scene in maxwell_scenes:
+        # only take in consideration renderer scenes
+        if folder_scene in scenes_list:
 
             print(folder_scene)
 

+ 43 - 8
run.sh

@@ -15,18 +15,20 @@ if [ "${erased}" == "Y" ]; then
     echo 'model_name; global_train_size; global_test_size; filtered_train_size; filtered_test_size; f1_train; f1_test; recall_train; recall_test; presicion_train; precision_test; acc_train; acc_test; roc_auc_train; roc_auc_test;' >> ${file_path}
 fi
 
-renderer="maxwell"
-scenes="A, D, G, H"
+renderer="all"
+scenes="A, B, C, D, E, F, G, H, I"
 
 svd_metric="svd_reconstruction"
 ipca_metric="ipca_reconstruction"
 fast_ica_metric="fast_ica_reconstruction"
 
+all_metrics="${svd_metric},${ipca_metric},${fast_ica_metric}"
+
 # First compute svd_reconstruction
 
 for begin in {80,85,90,95,100,105,110}; do
   for end in {150,160,170,180,190,200}; do
-
+  
     python generate_reconstructed_data.py --metric ${svd_metric} --param "${begin}, ${end}"
 
     for zone in {6,8,10,12}; do
@@ -40,7 +42,7 @@ for begin in {80,85,90,95,100,105,110}; do
       
         echo "Run computation for SVD model ${OUTPUT_DATA_FILE}"
 
-        python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${svd_metric} --renderer ${renderer} --scenes ${scenes} --param "${begin}, ${end}" --nb_zones ${zone} --random 1
+        python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metrics ${svd_metric} --renderer ${renderer} --scenes ${scenes} --params "${begin}, ${end}" --nb_zones ${zone} --random 1
         
         python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
       fi
@@ -50,9 +52,9 @@ done
 
 
 # computation of ipca_reconstruction
-ipca_batch_size=25
+ipca_batch_size=55
 
-for component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; do
+for component in {10,15,20,25,30,35,45,50}; do
   python generate_reconstructed_data.py --metric ${ipca_metric} --param "${component},${ipca_batch_size}"
 
   for zone in {6,8,10,12}; do
@@ -66,7 +68,7 @@ for component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; d
     
       echo "Run computation for IPCA model ${OUTPUT_DATA_FILE}"
 
-      python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${ipca_metric} --renderer ${renderer} --scenes ${scenes} --param "${component},${ipca_batch_size}" --nb_zones ${zone} --random 1
+      python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metrics ${ipca_metric} --renderer ${renderer} --scenes ${scenes} --params "${component},${ipca_batch_size}" --nb_zones ${zone} --random 1
       python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
     fi
   done
@@ -89,9 +91,42 @@ for component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; d
     
       echo "Run computation for Fast ICA model ${OUTPUT_DATA_FILE}"
 
-      python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${fast_ica_metric} --renderer ${renderer} --scenes ${scenes} --param "${component}" --nb_zones ${zone} --random 1
+      python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metrics ${fast_ica_metric} --renderer ${renderer} --scenes ${scenes} --params "${component}" --nb_zones ${zone} --random 1
       
       python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
     fi
   done
 done
+
+# RUN LATER
+# compute using all transformation methods
+ipca_batch_size=55
+
+: '
+for begin in {80,85,90,95,100,105,110}; do
+  for end in {150,160,170,180,190,200}; do
+    for ipca_component in {10,15,20,25,30,35,45,50}; do
+      for fast_ica_component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; do
+        for zone in {6,8,10,12}; do
+          OUTPUT_DATA_FILE="${svd_metric}_B${begin}_E${end}_${ipca_metric}__N${ipca_component}_BS${ipca_batch_size}_${fast_ica_metric}_N${fast_ica_component}_nb_zones_${zone}"
+
+          if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
+            
+            echo "Transformation combination model ${OUTPUT_DATA_FILE} already generated"
+          
+          else
+          
+            echo "Run computation for Transformation combination model ${OUTPUT_DATA_FILE}"
+
+            params="${begin}, ${end} :: ${ipca_component}, ${ipca_batch_size} :: ${fast_ica_component}"
+
+            python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${all_metrics} --renderer ${renderer} --scenes ${scenes} --params "${params}" --nb_zones ${zone} --random 1
+            
+            python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
+          fi
+        done
+      done
+    done
+  done
+done
+'

+ 0 - 34
run_test.sh

@@ -1,34 +0,0 @@
-#!/bin/bash
-
-erased=$1
-
-# file which contains model names we want to use for simulation
-file_path="models_info/models_comparisons.csv"
-
-if [ "${erased}" == "Y" ]; then
-    echo "Previous data file erased..."
-    rm ${file_path}
-    mkdir -p models_info
-    touch ${file_path}
-
-    # add of header
-    echo 'model_name; global_train_size; global_test_size; filtered_train_size; filtered_test_size; f1_train; f1_test; recall_train; recall_test; presicion_train; precision_test; acc_train; acc_test; roc_auc_train; roc_auc_test;' >> ${file_path}
-fi
-
-renderer="maxwell"
-scenes="A, D, G, H"
-
-svd_metric="svd_reconstruction"
-ipca_metric="ipca_reconstruction"
-fast_ica_metric="fast_ica_reconstruction"
-
-metrics="${svd_metric},${ipca_metric},${fast_ica_metric}"
-
-python generate_reconstructed_data.py --metric ${svd_metric} --param "100, 200"
-python generate_reconstructed_data.py --metric ${ipca_reconstruction} --param "50, 10"
-python generate_reconstructed_data.py --metric ${fast_ica_metric} --param "50"
-
-OUTPUT_DATA_FILE="test_3D_model"
-
-python generate_dataset_3D.py --output data/${OUTPUT_DATA_FILE} --metrics ${metrics} --renderer ${renderer} --scenes ${scenes} --params "100, 200 :: 50, 10 :: 50" --nb_zones ${zone} --random 1
-python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} --n_channels 3

+ 30 - 85
train_model.py

@@ -7,79 +7,12 @@ import cv2
 
 from sklearn.utils import shuffle
 
-from keras.preprocessing.image import ImageDataGenerator
-from keras.models import Sequential
-from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
-from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
-from keras import backend as K
-import tensorflow as tf
-
-from keras.utils import plot_model
-
 from modules.utils import config as cfg
-from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
-
-img_width, img_height = cfg.keras_img_size
-batch_size = 32
-
-def auc(y_true, y_pred):
-    auc = tf.metrics.auc(y_true, y_pred)[1]
-    K.get_session().run(tf.local_variables_initializer())
-    
-    return auc
-
-def generate_model(_input_shape):
-
-    model = Sequential()
-
-    model.add(Conv2D(60, (2, 2), input_shape=_input_shape))
-    model.add(Activation('relu'))
-    model.add(MaxPooling2D(pool_size=(2, 2)))
-
-    model.add(Conv2D(40, (2, 2)))
-    model.add(Activation('relu'))
-    model.add(MaxPooling2D(pool_size=(2, 2)))
-
-    model.add(Conv2D(20, (2, 2)))
-    model.add(Activation('relu'))
-    model.add(MaxPooling2D(pool_size=(2, 2)))
-
-    model.add(Flatten())
-
-    model.add(Dense(140))
-    model.add(Activation('relu'))
-    model.add(BatchNormalization())
-    model.add(Dropout(0.4))
-
-    model.add(Dense(120))
-    model.add(Activation('relu'))
-    model.add(BatchNormalization())
-    model.add(Dropout(0.4))
-
-    model.add(Dense(80))
-    model.add(Activation('relu'))
-    model.add(BatchNormalization())
-    model.add(Dropout(0.4))
+from modules.models import models
 
-    model.add(Dense(40))
-    model.add(Activation('relu'))
-    model.add(BatchNormalization())
-    model.add(Dropout(0.4))
-
-    model.add(Dense(20))
-    model.add(Activation('relu'))
-    model.add(BatchNormalization())
-    model.add(Dropout(0.4))
-
-    model.add(Dense(1))
-    model.add(Activation('sigmoid'))
-
-    model.compile(loss='binary_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy', auc])
-
-    return model
+from keras import backend as K
 
+from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
 
 def main():
 
@@ -90,7 +23,6 @@ def main():
     parser.add_argument('--batch_size', type=int, help='batch size used as model input', default=cfg.keras_batch)
     parser.add_argument('--epochs', type=int, help='number of epochs used for training model', default=cfg.keras_epochs)
     parser.add_argument('--val_size', type=int, help='percent of validation data during training process', default=cfg.val_dataset_size)
-    parser.add_argument('--n_channels', type=int, help='number of canals for 3D', default=1)
 
     args = parser.parse_args()
 
@@ -99,7 +31,6 @@ def main():
     p_batch_size = args.batch_size
     p_epochs     = args.epochs
     p_val_size   = args.val_size
-    p_n_channels = args.n_channels
         
     ########################
     # 1. Get and prepare data
@@ -118,15 +49,35 @@ def main():
     print("Reading all images data...")
 
     # getting number of chanel
-    n_channels = len(dataset_train[1].split(':'))
+    n_channels = len(dataset_train[1][1].split('::'))
+    print("Number of channels : ", n_channels)
+
+    img_width, img_height = cfg.keras_img_size
+
+    # specify the number of dimensions
+    if K.image_data_format() == 'channels_first':
+        if n_channels > 1:
+            input_shape = (1, n_channels, img_width, img_height)
+        else:
+            input_shape = (n_channels, img_width, img_height)
+
+    else:
+        if n_channels > 1:
+            input_shape = (1, img_width, img_height, n_channels)
+        else:
+            input_shape = (img_width, img_height, n_channels)
 
     # `:` is the separator used for getting each img path
-    if p_n_channels > 1:
-        dataset_train[1] = dataset_train[1].split(':').apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE).reshape(input_shape))
-        dataset_test[1] = dataset_test[1].split(':').apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE).reshape(input_shape))
+    if n_channels > 1:
+        dataset_train[1] = dataset_train[1].apply(lambda x: [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')])
+        dataset_test[1] = dataset_test[1].apply(lambda x: [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')])
     else:
-        dataset_train[1] = dataset_train[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE).reshape(input_shape))
-        dataset_test[1] = dataset_test[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE).reshape(input_shape))
+        dataset_train[1] = dataset_train[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE))
+        dataset_test[1] = dataset_test[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE))
+
+    # reshape array data
+    dataset_train[1] = dataset_train[1].apply(lambda x: np.array(x).reshape(input_shape))
+    dataset_test[1] = dataset_test[1].apply(lambda x: np.array(x).reshape(input_shape))
 
     # get dataset with equal number of classes occurences
     noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
@@ -178,13 +129,7 @@ def main():
     # 2. Getting model
     #######################
 
-        # specify the number of dimensions
-    if K.image_data_format() == 'channels_first':
-        input_shape = (n_channels, img_width, img_height)
-    else:
-        input_shape = (img_width, img_height, n_channels)
-
-    model = generate_model(input_shape)
+    model = models.get_model(n_channels, input_shape)
     model.summary()
  
     model.fit(x_data_train, y_dataset_train.values, validation_split=p_val_size, epochs=p_epochs, batch_size=p_batch_size)