Parcourir la source

Merge branch 'release/v0.1.0'

Jérôme BUISINE il y a 4 ans
Parent
commit
a791778cae

+ 2 - 0
.gitignore

@@ -7,9 +7,11 @@ __pycache__
 # by default avoid model files and png files
 # by default avoid model files and png files
 *.h5
 *.h5
 *.png
 *.png
+saved_models
 !saved_models/*.h5
 !saved_models/*.h5
 !saved_models/*.png
 !saved_models/*.png
 
 
 # data
 # data
 learned_zones
 learned_zones
 dataset
 dataset
+models_info

+ 17 - 20
README.md

@@ -8,37 +8,34 @@ pip install -r requirements.txt
 
 
 ## How to use
 ## How to use
 
 
-Generate dataset (run only once time or clean data folder before):
+Generate reconstructed data from specific method of reconstruction (run only once time or clean data folder before):
 ```
 ```
-python generate_dataset.py
+python generate_reconstructed_data.py -h
 ```
 ```
 
 
-It will split scenes and generate all data you need for your neural network.
-You can specify the number of sub images you want in the script by modifying **_NUMBER_SUB_IMAGES_** variable or using parameter.
-
+Generate custom dataset from one reconstructed method or multiples (implemented later)
 ```
 ```
-python generate_dataset.py --nb xxxx
+python generate_dataset.py -h
 ```
 ```
 
 
-There are 3 kinds of Neural Networks:
-- **classification_cnn_keras.py**: *based on cropped images and do convolution*
-- **classification_cnn_keras_cross_validation.py**: *based on cropped images and do convolution. Data are randomly split for training*
-- **classification_cnn_keras_svd.py**: *based on svd metrics of image*
-
-
-After your built your neural network in classification_cnn_keras.py, you just have to run it:
-
-```
-python classification_cnn_keras_svd.py --directory xxxx --output xxxxx --batch_size xx --epochs xx --img xx (or --image_width xx --img_height xx)
-```
+### Reconstruction parameter
 
 
-A config file in json is available and keeps in memory all image sizes available.
+List of expected parameter by reconstruction method:
+- **svd:** Singular Values Decomposition
+  - Param definition: *interval data used for reconstruction (begin, end)*
+  - Example: *"100, 200"*
+- **ipca:** Iterative Principal Component Analysis
+  - Param definition: *number of components used for compression and batch size*
+  - Example: *"50, 32"*
+- **fast_ica:**  Fast Iterative Component Analysis
+  - Param definition: *number of components used for compression*
+  - Example: *"50"*
 
 
 ## Modules
 ## Modules
 
 
 This project contains modules:
 This project contains modules:
-- **modules/image_metrics**: *where all computed metrics function are developed*
-- **modules/model_helper**: *contains helpful function to save or display model information and performance*
+- **modules/utils/config.py**: *Store all configuration information about the project and dataset information*
+- **modules/utils/data.py**: *Usefull methods used for dataset*
 
 
 All these modules will be enhanced during development of the project
 All these modules will be enhanced during development of the project
 
 

+ 0 - 25
TODO.md

@@ -1,25 +0,0 @@
-# TODO :
-
-## Prépation des données
-
-- Séparer dans 2 dossiers les images (noisy, not noisy) 
-  - Par scène
-  - Par zone
-  - Par métrique [scene, zone]
-  
-- Transformer chaque image comme souhaitée (ici reconstruction SV avec 110 composantes faibles)
-- Pour chaque image ajouter sa forme sous 4 rotations (augmentation du nombre de données)
-
-## Chargement des données
-- Chargement de l'ensemble des images (association : "path", "label")
-- Mettre en place un équilibre de classes
-- Mélange de l'ensemble des données
-- Séparation des données (train, validation, test)
-
-## Conception du modèle
-- Mise en place d'un modèle CNN
-- Utilisation BatchNormalization / Dropout
-
-
-## Si non fonctionnel
-- Utilisation d'une approche transfer learning

+ 12 - 13
generate_dataset.py

@@ -19,7 +19,8 @@ from skimage import color
 from modules.utils import config as cfg
 from modules.utils import config as cfg
 from modules.utils import data as dt
 from modules.utils import data as dt
 
 
-from preprocessing_functions import svd_reconstruction
+from transformation_functions import svd_reconstruction
+from modules.classes.Transformation import Transformation
 
 
 # getting configuration information
 # getting configuration information
 config_filename         = cfg.config_filename
 config_filename         = cfg.config_filename
@@ -40,7 +41,7 @@ output_data_folder      = cfg.output_data_folder
 
 
 generic_output_file_svd = '_random.csv'
 generic_output_file_svd = '_random.csv'
 
 
-def generate_data_model(_scenes_list, _filename, _interval,  _metric, _scenes, _nb_zones = 4, _random=0):
+def generate_data_model(_scenes_list, _filename, _transformation, _scenes, _nb_zones = 4, _random=0):
 
 
     output_train_filename = _filename + ".train"
     output_train_filename = _filename + ".train"
     output_test_filename = _filename + ".test"
     output_test_filename = _filename + ".test"
@@ -58,7 +59,6 @@ def generate_data_model(_scenes_list, _filename, _interval,  _metric, _scenes, _
     scenes = os.listdir(path)
     scenes = os.listdir(path)
     # remove min max file from scenes folder
     # remove min max file from scenes folder
     scenes = [s for s in scenes if min_max_filename not in s]
     scenes = [s for s in scenes if min_max_filename not in s]
-    begin, end = _interval
 
 
     # go ahead each scenes
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(_scenes_list):
     for id_scene, folder_scene in enumerate(_scenes_list):
@@ -96,16 +96,13 @@ def generate_data_model(_scenes_list, _filename, _interval,  _metric, _scenes, _
             current_zone_folder = "zone" + index_str
             current_zone_folder = "zone" + index_str
             zone_path = os.path.join(scene_path, current_zone_folder)
             zone_path = os.path.join(scene_path, current_zone_folder)
 
 
-            # custom path for metric
-            metric_path = os.path.join(zone_path, _metric)
-
             # custom path for interval of reconstruction and metric
             # custom path for interval of reconstruction and metric
-            metric_interval_path = os.path.join(metric_path, str(begin) + "_" + str(end))
+            metric_interval_path = os.path.join(zone_path, _transformation.getTranformationPath())
 
 
             for label in os.listdir(metric_interval_path):
             for label in os.listdir(metric_interval_path):
                 label_path = os.path.join(metric_interval_path, label)
                 label_path = os.path.join(metric_interval_path, label)
 
 
-                images = os.listdir(label_path)
+                images = sorted(os.listdir(label_path))
 
 
                 for img in images:
                 for img in images:
                     img_path = os.path.join(label_path, img)
                     img_path = os.path.join(label_path, img)
@@ -144,7 +141,7 @@ def main():
                                     help="metric choice in order to compute data (use 'all' if all metrics are needed)", 
                                     help="metric choice in order to compute data (use 'all' if all metrics are needed)", 
                                     choices=metric_choices,
                                     choices=metric_choices,
                                     required=True)
                                     required=True)
-    parser.add_argument('--interval', type=str, help="interval choice if needed by the compression method", default='"100, 200"')
+    parser.add_argument('--param', type=str, help="specific param for metric (See README.md for further information)")
     parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
     parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
     parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set', choices=list(range(1, 17)))
     parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set', choices=list(range(1, 17)))
     parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=cfg.renderer_choices, default='all')
     parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=cfg.renderer_choices, default='all')
@@ -154,13 +151,16 @@ def main():
 
 
     p_filename = args.output
     p_filename = args.output
     p_metric   = args.metric
     p_metric   = args.metric
-    p_interval = list(map(int, args.interval.split(',')))
+    p_param    = args.param
     p_scenes   = args.scenes.split(',')
     p_scenes   = args.scenes.split(',')
     p_nb_zones = args.nb_zones
     p_nb_zones = args.nb_zones
     p_renderer = args.renderer
     p_renderer = args.renderer
     p_random   = args.random
     p_random   = args.random
 
 
-        # list all possibles choices of renderer
+    # create new Transformation obj
+    transformation = Transformation(p_metric, p_param)
+
+    # list all possibles choices of renderer
     scenes_list = dt.get_renderer_scenes_names(p_renderer)
     scenes_list = dt.get_renderer_scenes_names(p_renderer)
     scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
     scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
 
 
@@ -172,8 +172,7 @@ def main():
         scenes_selected.append(scenes_list[index])
         scenes_selected.append(scenes_list[index])
 
 
     # create database using img folder (generate first time only)
     # create database using img folder (generate first time only)
-    generate_data_model(scenes_list, p_filename, p_interval,  p_metric, scenes_selected, p_nb_zones, p_random)
-
+    generate_data_model(scenes_list, p_filename, transformation, scenes_selected, p_nb_zones, p_random)
 
 
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 29 - 27
generate_reconstructed_data.py

@@ -17,7 +17,7 @@ from ipfml import processing, metrics, utils
 from skimage import color
 from skimage import color
 
 
 from modules.utils import config as cfg
 from modules.utils import config as cfg
-from preprocessing_functions import svd_reconstruction
+from modules.classes.Transformation import Transformation
 
 
 # getting configuration information
 # getting configuration information
 config_filename         = cfg.config_filename
 config_filename         = cfg.config_filename
@@ -37,18 +37,15 @@ output_data_folder      = cfg.output_data_folder
 
 
 generic_output_file_svd = '_random.csv'
 generic_output_file_svd = '_random.csv'
 
 
-def generate_data_svd(data_type, interval):
+def generate_data(transformation):
     """
     """
     @brief Method which generates all .csv files from scenes
     @brief Method which generates all .csv files from scenes
-    @param data_type,  metric choice
-    @param interval, interval choice used by reconstructed method
     @return nothing
     @return nothing
     """
     """
 
 
     scenes = os.listdir(path)
     scenes = os.listdir(path)
     # remove min max file from scenes folder
     # remove min max file from scenes folder
     scenes = [s for s in scenes if min_max_filename not in s]
     scenes = [s for s in scenes if min_max_filename not in s]
-    begin, end = interval
 
 
     # go ahead each scenes
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(scenes):
     for id_scene, folder_scene in enumerate(scenes):
@@ -84,13 +81,13 @@ def generate_data_svd(data_type, interval):
                 zones_threshold.append(int(f.readline()))
                 zones_threshold.append(int(f.readline()))
 
 
             # custom path for metric
             # custom path for metric
-            metric_path = os.path.join(zone_path, data_type)
+            metric_path = os.path.join(zone_path, transformation.getName())
 
 
             if not os.path.exists(metric_path):
             if not os.path.exists(metric_path):
                 os.makedirs(metric_path)
                 os.makedirs(metric_path)
 
 
             # custom path for interval of reconstruction and metric
             # custom path for interval of reconstruction and metric
-            metric_interval_path = os.path.join(metric_path, str(begin) + "_" + str(end))
+            metric_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
             metrics_folder.append(metric_interval_path)
             metrics_folder.append(metric_interval_path)
 
 
             if not os.path.exists(metric_interval_path):
             if not os.path.exists(metric_interval_path):
@@ -128,7 +125,7 @@ def generate_data_svd(data_type, interval):
                 ##########################
                 ##########################
                 # Image computation part #
                 # Image computation part #
                 ##########################
                 ##########################
-                output_block = svd_reconstruction(block, [begin, end])
+                output_block = transformation.getTransformedImage(block)
                 output_block = np.array(output_block, 'uint8')
                 output_block = np.array(output_block, 'uint8')
                 
                 
                 # current output image
                 # current output image
@@ -142,21 +139,30 @@ def generate_data_svd(data_type, interval):
                 else:
                 else:
                     label_path = os.path.join(label_path, cfg.noisy_folder)
                     label_path = os.path.join(label_path, cfg.noisy_folder)
 
 
+                # Data augmentation!
                 rotations = [0, 90, 180, 270]
                 rotations = [0, 90, 180, 270]
+                img_flip_labels = ['original', 'horizontal', 'vertical', 'both']
 
 
-                # rotate image to increase dataset size
-                for rotation in rotations:
-                    rotated_output_img = output_block_img.rotate(rotation)
+                horizontal_img = output_block_img.transpose(Image.FLIP_LEFT_RIGHT)
+                vertical_img = output_block_img.transpose(Image.FLIP_TOP_BOTTOM)
+                both_img = output_block_img.transpose(Image.TRANSPOSE)
 
 
-                    output_reconstructed_filename = img_path.split('/')[-1].replace('.png', '') + '_' + zones_folder[id_block]
-                    output_reconstructed_filename = output_reconstructed_filename + '_' + str(rotation) + '.png'
-                    output_reconstructed_path = os.path.join(label_path, output_reconstructed_filename)
+                flip_images = [output_block_img, horizontal_img, vertical_img, both_img]
 
 
-                    rotated_output_img.save(output_reconstructed_path)
+                # rotate and flip image to increase dataset size
+                for id, flip in enumerate(flip_images):
+                    for rotation in rotations:
+                        rotated_output_img = flip.rotate(rotation)
+
+                        output_reconstructed_filename = img_path.split('/')[-1].replace('.png', '') + '_' + zones_folder[id_block]
+                        output_reconstructed_filename = output_reconstructed_filename + '_' + img_flip_labels[id] + '_' + str(rotation) + '.png'
+                        output_reconstructed_path = os.path.join(label_path, output_reconstructed_filename)
+
+                        rotated_output_img.save(output_reconstructed_path)
 
 
 
 
             start_index_image_int = int(start_index_image)
             start_index_image_int = int(start_index_image)
-            print(data_type + "_" + folder_scene + " - " + "{0:.2f}".format((current_counter_index - start_index_image_int) / (end_counter_index - start_index_image_int)* 100.) + "%")
+            print(transformation.getName() + "_" + folder_scene + " - " + "{0:.2f}".format((current_counter_index - start_index_image_int) / (end_counter_index - start_index_image_int)* 100.) + "%")
             sys.stdout.write("\033[F")
             sys.stdout.write("\033[F")
 
 
             current_counter_index += step_counter
             current_counter_index += step_counter
@@ -164,7 +170,7 @@ def generate_data_svd(data_type, interval):
 
 
         print('\n')
         print('\n')
 
 
-    print("%s_%s : end of data generation\n" % (data_type, interval))
+    print("%s_%s : end of data generation\n" % (transformation.getName(), transformation.getParam()))
 
 
 
 
 def main():
 def main():
@@ -172,25 +178,21 @@ def main():
     parser = argparse.ArgumentParser(description="Compute and prepare data of metric of all scenes using specific interval if necessary")
     parser = argparse.ArgumentParser(description="Compute and prepare data of metric of all scenes using specific interval if necessary")
 
 
     parser.add_argument('--metric', type=str, 
     parser.add_argument('--metric', type=str, 
-                                    help="metric choice in order to compute data (use 'all' if all metrics are needed)", 
+                                    help="metric choice in order to compute data", 
                                     choices=metric_choices,
                                     choices=metric_choices,
                                     required=True)
                                     required=True)
 
 
-    parser.add_argument('--interval', type=str, 
-                                    help="interval choice if needed by the compression method", 
-                                    default='"100, 200"')
+    parser.add_argument('--param', type=str, help="specific param for metric (See README.md for further information)")
 
 
     args = parser.parse_args()
     args = parser.parse_args()
 
 
     p_metric   = args.metric
     p_metric   = args.metric
-    p_interval = list(map(int, args.interval.split(',')))
+    p_param    = args.param
+
+    transformation = Transformation(p_metric, p_param)
 
 
     # generate all or specific metric data
     # generate all or specific metric data
-    if p_metric == 'all':
-        for m in metric_choices:
-            generate_data_svd(m, p_interval)
-    else:
-        generate_data_svd(p_metric, p_interval)
+    generate_data(transformation)
 
 
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 53 - 0
modules/classes/Transformation.py

@@ -0,0 +1,53 @@
+import os
+
+from transformation_functions import svd_reconstruction, fast_ica_reconstruction, ipca_reconstruction
+
+# Transformation class to store transformation method of image and get usefull information
+class Transformation():
+
+    def __init__(self, _transformation, _param):
+        self.transformation = _transformation
+        self.param = _param
+
+    def getTransformedImage(self, img):
+
+        if self.transformation == 'svd_reconstruction':
+            begin, end = list(map(int, self.param.split(',')))
+            data = svd_reconstruction(img, [begin, end])
+
+        if self.transformation == 'ipca_reconstruction':
+            n_components, batch_size = list(map(int, self.param.split(',')))
+            data = ipca_reconstruction(img, n_components, batch_size)
+
+        if self.transformation == 'fast_ica_reconstruction':
+            n_components = self.param
+            data = fast_ica_reconstruction(img, n_components)
+
+        return data
+    
+    def getTransformationPath(self):
+
+        path = self.transformation
+
+        if self.transformation == 'svd_reconstruction':
+            begin, end = list(map(int, self.param.split(',')))
+            path = os.path.join(path, str(begin) + '_' + str(end))
+
+        if self.transformation == 'ipca_reconstruction':
+            n_components, batch_size = list(map(int, self.param.split(',')))
+            path = os.path.join(path, 'N' + str(n_components) + '_' + str(batch_size))
+
+        if self.transformation == 'fast_ica_reconstruction':
+            n_components = self.param
+            path = os.path.join(path, 'N' + str(n_components))
+
+        return path
+
+    def getName(self):
+        return self.transformation
+
+    def getParam(self):
+        return self.param
+
+    def __str__( self ):
+        return self.transformation + ' transformation with parameter : ' + self.param

+ 0 - 0
modules/classes/__init__.py


+ 0 - 75
modules/models.py

@@ -1,75 +0,0 @@
-from sklearn.model_selection import GridSearchCV
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier, VotingClassifier
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.ensemble import GradientBoostingClassifier
-import sklearn.svm as svm
-
-
-def _get_best_model(X_train, y_train):
-
-    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
-    gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
-    param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
-
-    svc = svm.SVC(probability=True)
-    clf = GridSearchCV(svc, param_grid, cv=10, scoring='accuracy', verbose=10)
-
-    clf.fit(X_train, y_train)
-
-    model = clf.best_estimator_
-
-    return model
-
-def svm_model(X_train, y_train):
-
-    return _get_best_model(X_train, y_train)
-
-
-def ensemble_model(X_train, y_train):
-
-    svm_model = _get_best_model(X_train, y_train)
-
-    lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
-    rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
-
-    ensemble_model = VotingClassifier(estimators=[
-       ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)], voting='soft', weights=[1,1,1])
-
-    ensemble_model.fit(X_train, y_train)
-
-    return ensemble_model
-
-
-def ensemble_model_v2(X_train, y_train):
-
-    svm_model = _get_best_model(X_train, y_train)
-    knc_model = KNeighborsClassifier(n_neighbors=2)
-    gbc_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
-    lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
-    rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
-
-    ensemble_model = VotingClassifier(estimators=[
-       ('lr', lr_model),
-       ('knc', knc_model),
-       ('gbc', gbc_model),
-       ('svm', svm_model),
-       ('rf', rf_model)],
-       voting='soft', weights=[1, 1, 1, 1, 1])
-
-    ensemble_model.fit(X_train, y_train)
-
-    return ensemble_model
-
-def get_trained_model(choice, X_train, y_train):
-
-    if choice == 'svm_model':
-        return svm_model(X_train, y_train)
-
-    if choice == 'ensemble_model':
-        return ensemble_model(X_train, y_train)
-
-    if choice == 'ensemble_model_v2':
-        return ensemble_model_v2(X_train, y_train)
-
-

+ 4 - 3
modules/utils/config.py

@@ -38,7 +38,8 @@ cycle_scenes_indices            = ['E', 'I']
 normalization_choices           = ['svd', 'svdn', 'svdne']
 normalization_choices           = ['svd', 'svdn', 'svdne']
 zones_indices                   = np.arange(16)
 zones_indices                   = np.arange(16)
 
 
-metric_choices_labels           = ['all', 'svd_reconstruction']
+metric_choices_labels           = ['all', 'svd_reconstruction', 'fast_ica_reconstruction', 'ipca_reconstruction']
 
 
-keras_epochs                    = 500
-keras_batch                     = 32
+keras_epochs                    = 50
+keras_batch                     = 32
+val_dataset_size                = 0.2

+ 1 - 12
modules/utils/data.py

@@ -1,6 +1,6 @@
 from ipfml import processing, metrics, utils
 from ipfml import processing, metrics, utils
 from modules.utils.config import *
 from modules.utils.config import *
-from preprocessing_functions import svd_reconstruction
+from transformation_functions import svd_reconstruction
 
 
 from PIL import Image
 from PIL import Image
 from skimage import color
 from skimage import color
@@ -22,17 +22,6 @@ _scenes_indices_prefix = '_scenes_indices'
 context_vars = vars()
 context_vars = vars()
 
 
 
 
-def get_data(data_type, block, interval=(100, 200)):
-    """
-    Method which returns the data type expected
-    """
-
-    if data_type == 'svd_reconstruct':
-        begin, end = interval
-        data = svd_reconstruction(block, [begin, end])
-
-    return data
-
 def get_renderer_scenes_indices(renderer_name):
 def get_renderer_scenes_indices(renderer_name):
 
 
     if renderer_name not in renderer_choices:
     if renderer_name not in renderer_choices:

+ 0 - 23
preprocessing_functions.py

@@ -1,23 +0,0 @@
-from numpy.linalg import svd
-from PIL import Image
-from scipy import misc
-
-import time
-
-import numpy as np
-from ipfml import metrics
-
-def svd_reconstruction(img, interval):
-    
-    begin, end = interval
-    lab_img = metrics.get_LAB_L(img)
-    lab_img = np.array(lab_img, 'uint8')
-    
-    U, s, V = svd(lab_img, full_matrices=True)
-    
-    # reconstruction using specific interval
-    smat = np.zeros((end-begin, end-begin), dtype=complex)
-    smat[:, :] = np.diag(s[begin:end])
-    output_img = np.dot(U[:, begin:end],  np.dot(smat, V[begin:end, :]))
-        
-    return output_img

+ 85 - 5
run.sh

@@ -1,18 +1,98 @@
 #!/bin/bash
 #!/bin/bash
 
 
-metric="svd_reconstruction"
+erased=$1
+
+# file which contains model names we want to use for simulation
+file_path="models_info/models_comparisons.csv"
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; global_train_size; global_test_size; filtered_train_size; filtered_test_size; f1_train; f1_test; recall_train; recall_test; presicion_train; precision_test; acc_train; acc_test; roc_auc_train; roc_auc_test;' >> ${file_path}
+fi
+
+renderer="maxwell"
+scenes="A, D, G, H"
+
+svd_metric="svd_reconstruction"
+ipca_metric="ipca_reconstruction"
+fast_ica_metric="fast_ica_reconstruction"
+
+# First compute svd_reconstruction
 
 
 for begin in {80,85,90,95,100,105,110}; do
 for begin in {80,85,90,95,100,105,110}; do
   for end in {150,160,170,180,190,200}; do
   for end in {150,160,170,180,190,200}; do
 
 
-    # python generate_reconstructed_data.py --metric ${metric} --interval "${begin}, ${end}"
+    python generate_reconstructed_data.py --metric ${svd_metric} --param "${begin}, ${end}"
 
 
     for zone in {6,8,10,12}; do
     for zone in {6,8,10,12}; do
-      OUTPUT_DATA_FILE="${metric}_nb_zones_${zone}_B${begin}_E${end}"
+      OUTPUT_DATA_FILE="${svd_metric}_nb_zones_${zone}_B${begin}_E${end}"
 
 
-      python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${metric} --renderer "maxwell" --scenes "A, D, G, H" --interval "${begin}, ${end}" --nb_zones ${zone} --random 1
+      if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
+        
+        echo "SVD model ${OUTPUT_DATA_FILE} already generated"
+      
+      else
       
       
-      python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE}
+        echo "Run computation for SVD model ${OUTPUT_DATA_FILE}"
+
+        python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${svd_metric} --renderer ${renderer} --scenes ${scenes} --param "${begin}, ${end}" --nb_zones ${zone} --random 1
+        
+        python train_model_2D.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
+      fi
     done
     done
   done
   done
 done
 done
+
+
+# computation of ipca_reconstruction
+ipca_batch_size=25
+
+for component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; do
+  python generate_reconstructed_data.py --metric ${ipca_metric} --param "${component},${ipca_batch_size}"
+
+  for zone in {6,8,10,12}; do
+    OUTPUT_DATA_FILE="${ipca_metric}_nb_zones_${zone}_N${component}_BS${ipca_batch_size}"
+
+    if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
+      
+      echo "IPCA model ${OUTPUT_DATA_FILE} already generated"
+    
+    else
+    
+      echo "Run computation for IPCA model ${OUTPUT_DATA_FILE}"
+
+      python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${ipca_metric} --renderer ${renderer} --scenes ${scenes} --param "${component},${ipca_batch_size}" --nb_zones ${zone} --random 1
+      
+      python train_model_2D.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
+    fi
+  done
+done
+
+
+# computation of fast_ica_reconstruction
+
+for component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; do
+  python generate_reconstructed_data.py --metric ${fast_ica_metric} --param "${component}"
+
+  for zone in {6,8,10,12}; do
+    OUTPUT_DATA_FILE="${fast_ica_metric}_nb_zones_${zone}_N${component}"
+
+    if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
+      
+      echo "Fast ICA model ${OUTPUT_DATA_FILE} already generated"
+    
+    else
+    
+      echo "Run computation for Fast ICA model ${OUTPUT_DATA_FILE}"
+
+      python generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${fast_ica_metric} --renderer ${renderer} --scenes ${scenes} --param "${component}" --nb_zones ${zone} --random 1
+      
+      python train_model_2D.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
+    fi
+  done
+done

+ 73 - 16
train_model.py

@@ -12,10 +12,12 @@ from keras.models import Sequential
 from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
 from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
 from keras import backend as K
 from keras import backend as K
+import tensorflow as tf
+
 from keras.utils import plot_model
 from keras.utils import plot_model
 
 
 from modules.utils import config as cfg
 from modules.utils import config as cfg
-from sklearn.metrics import roc_auc_score
+from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
 
 
 img_width, img_height = 200, 200
 img_width, img_height = 200, 200
 batch_size = 32
 batch_size = 32
@@ -26,6 +28,11 @@ if K.image_data_format() == 'channels_first':
 else:
 else:
     input_shape = (img_width, img_height, 1)
     input_shape = (img_width, img_height, 1)
 
 
+def auc(y_true, y_pred):
+    auc = tf.metrics.auc(y_true, y_pred)[1]
+    K.get_session().run(tf.local_variables_initializer())
+    #K.get_session().run(tf.local_variables_initializer())
+    return auc
 
 
 def generate_model(_input_shape):
 def generate_model(_input_shape):
 
 
@@ -75,7 +82,7 @@ def generate_model(_input_shape):
 
 
     model.compile(loss='binary_crossentropy',
     model.compile(loss='binary_crossentropy',
                   optimizer='rmsprop',
                   optimizer='rmsprop',
-                  metrics=['accuracy'])
+                  metrics=['accuracy', auc])
 
 
     return model
     return model
 
 
@@ -84,13 +91,19 @@ def main():
 
 
     parser = argparse.ArgumentParser(description="Train Keras model and save it into .json file")
     parser = argparse.ArgumentParser(description="Train Keras model and save it into .json file")
 
 
-    parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .test)')
-    parser.add_argument('--output', type=str, help='output file name desired for model (without .json extension)')
+    parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .test)', required=True)
+    parser.add_argument('--output', type=str, help='output file name desired for model (without .json extension)', required=True)
+    parser.add_argument('--batch_size', type=int, help='batch size used as model input', default=cfg.keras_batch)
+    parser.add_argument('--epochs', type=int, help='number of epochs used for training model', default=cfg.keras_epochs)
+    parser.add_argument('--val_size', type=int, help='percent of validation data during training process', default=cfg.val_dataset_size)
 
 
     args = parser.parse_args()
     args = parser.parse_args()
 
 
-    p_data_file = args.data
-    p_output    = args.output
+    p_data_file  = args.data
+    p_output     = args.output
+    p_batch_size = args.batch_size
+    p_epochs     = args.epochs
+    p_val_size   = args.val_size
 
 
     ########################
     ########################
     # 1. Get and prepare data
     # 1. Get and prepare data
@@ -99,6 +112,9 @@ def main():
     dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
     dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
     dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
     dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
 
 
+    print("Train set size : ", len(dataset_train))
+    print("Test set size : ", len(dataset_test))
+
     # default first shuffle of data
     # default first shuffle of data
     dataset_train = shuffle(dataset_train)
     dataset_train = shuffle(dataset_train)
     dataset_test = shuffle(dataset_test)
     dataset_test = shuffle(dataset_test)
@@ -140,10 +156,19 @@ def main():
         x_data_train.append(item[0])
         x_data_train.append(item[0])
 
 
     x_data_train = np.array(x_data_train)
     x_data_train = np.array(x_data_train)
+
+    x_data_test = []
+    for item in x_dataset_test.values:
+        #print("Item is here", item)
+        x_data_test.append(item[0])
+
+    x_data_test = np.array(x_data_test)
+
+
     print("End of loading data..")
     print("End of loading data..")
 
 
-    print(x_data_train.shape)
-    print(x_data_train[0])
+    print("Train set size (after balancing) : ", final_df_train_size)
+    print("Test set size (after balancing) : ", final_df_test_size)
 
 
     #######################
     #######################
     # 2. Getting model
     # 2. Getting model
@@ -151,10 +176,10 @@ def main():
 
 
     model = generate_model(input_shape)
     model = generate_model(input_shape)
     model.summary()
     model.summary()
+ 
+    model.fit(x_data_train, y_dataset_train.values, validation_split=p_val_size, epochs=p_epochs, batch_size=p_batch_size)
 
 
-    model.fit(x_data_train, y_dataset_train.values, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
-
-    score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=cfg.keras_batch)
+    score = model.evaluate(x_data_test, y_dataset_test, batch_size=p_batch_size)
 
 
     if not os.path.exists(cfg.saved_models_folder):
     if not os.path.exists(cfg.saved_models_folder):
         os.makedirs(cfg.saved_models_folder)
         os.makedirs(cfg.saved_models_folder)
@@ -169,11 +194,43 @@ def main():
 
 
     model.save_weights(model_output_path.replace('.json', '.h5'))
     model.save_weights(model_output_path.replace('.json', '.h5'))
 
 
-    # Save results obtained from model
-    y_test_prediction = model.predict(x_dataset_test)
-    print("Metrics : ", model.metrics_names)
-    print("Prediction : ", score)
-    print("ROC AUC : ", roc_auc_score(y_dataset_test, y_test_prediction))
+    # Get results obtained from model
+    y_train_prediction = model.predict(x_data_train)
+    y_test_prediction = model.predict(x_data_test)
+
+    y_train_prediction = [1 if x > 0.5 else 0 for x in y_train_prediction]
+    y_test_prediction = [1 if x > 0.5 else 0 for x in y_test_prediction]
+
+    acc_train_score = accuracy_score(y_dataset_train, y_train_prediction)
+    acc_test_score = accuracy_score(y_dataset_test, y_test_prediction)
+
+    f1_train_score = f1_score(y_dataset_train, y_train_prediction)
+    f1_test_score = f1_score(y_dataset_test, y_test_prediction)
+
+    recall_train_score = recall_score(y_dataset_train, y_train_prediction)
+    recall_test_score = recall_score(y_dataset_test, y_test_prediction)
+
+    pres_train_score = precision_score(y_dataset_train, y_train_prediction)
+    pres_test_score = precision_score(y_dataset_test, y_test_prediction)
+
+    roc_train_score = roc_auc_score(y_dataset_train, y_train_prediction)
+    roc_test_score = roc_auc_score(y_dataset_test, y_test_prediction)
+
+    # save model performance
+    if not os.path.exists(cfg.models_information_folder):
+        os.makedirs(cfg.models_information_folder)
+
+    perf_file_path = os.path.join(cfg.models_information_folder, cfg.csv_model_comparisons_filename)
+
+    with open(perf_file_path, 'a') as f:
+        line = p_output + ';' + str(len(dataset_train)) + ';' + str(len(dataset_test)) + ';' \
+                        + str(final_df_train_size) + ';' + str(final_df_test_size) + ';' \
+                        + str(acc_train_score) + ';' + str(acc_test_score) + ';' \
+                        + str(f1_train_score) + ';' + str(f1_test_score) + ';' \
+                        + str(recall_train_score) + ';' + str(recall_test_score) + ';' \
+                        + str(pres_train_score) + ';' + str(pres_test_score) + ';' \
+                        + str(roc_train_score) + ';' + str(roc_test_score) + '\n'
+        f.write(line)
 
 
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 43 - 0
transformation_functions.py

@@ -0,0 +1,43 @@
+from numpy.linalg import svd
+from sklearn.decomposition import FastICA, IncrementalPCA
+
+import numpy as np
+
+from ipfml import metrics
+
+def svd_reconstruction(img, interval):
+    
+    begin, end = interval
+    lab_img = metrics.get_LAB_L(img)
+    lab_img = np.array(lab_img, 'uint8')
+    
+    U, s, V = svd(lab_img, full_matrices=True)
+    
+    # reconstruction using specific interval
+    smat = np.zeros((end-begin, end-begin), dtype=complex)
+    smat[:, :] = np.diag(s[begin:end])
+    output_img = np.dot(U[:, begin:end],  np.dot(smat, V[begin:end, :]))
+        
+    return output_img
+
+
+def fast_ica_reconstruction(img, components):
+
+    ica = FastICA(n_components = 50)
+    # run ICA on image
+    ica.fit(img)
+    # reconstruct image with independent components
+    image_ica = ica.fit_transform(img)
+    restored_image = ica.inverse_transform(image_ica)
+
+    return restored_image
+
+
+def ipca_reconstruction(img, components, _batch_size=25):
+
+    transformer = IncrementalPCA(n_components=components, batch_size=_batch_size)
+
+    transformed_image = transformer.fit_transform(img) 
+    restored_image = transformer.inverse_transform(transformed_image)
+
+    return restored_image