Parcourir la source

Merge branch 'release/v0.4.8'

Jérôme BUISINE il y a 3 ans
Parent
commit
5ace7f1247

+ 0 - 53
README.md

@@ -12,31 +12,6 @@ pip install -r requirements.txt
 
 
 ## Project structure
 ## Project structure
 
 
-### Link to your dataset
-
-You have to create a symbolic link to your own database which respects this structure:
-
-- dataset/
-  - Scene1/
-    - zone00/
-    - ...
-    - zone15/
-      - seuilExpe (file which contains threshold samples of zone image perceived by human)
-    - Scene1_00050.png
-    - Scene1_00070.png
-    - ...
-    - Scene1_01180.png
-    - Scene1_01200.png
-  - Scene2/
-    - ...
-  - ...
-
-Create your symbolic link:
-
-```
-ln -s /path/to/your/data dataset
-```
-
 ### Code architecture description
 ### Code architecture description
 
 
 - **modules/\***: contains all modules usefull for the whole project (such as configuration variables)
 - **modules/\***: contains all modules usefull for the whole project (such as configuration variables)
@@ -127,34 +102,6 @@ All scripts named **prediction/predict_seuil_expe\*.py** are used to simulate mo
 
 
 Once you have simulation done. Checkout your **threshold_map/%MODEL_NAME%/simulation\_curves\_zones\_\*/** folder and use it with help of **display_simulation_curves.py** script.
 Once you have simulation done. Checkout your **threshold_map/%MODEL_NAME%/simulation\_curves\_zones\_\*/** folder and use it with help of **display_simulation_curves.py** script.
 
 
-
-## Use with Calculco (OAR service)
-
-The `oar.example.sh` is an example of script to run in OAR platform.
-
-```
-oarsub -S oar.sh
-```
-
-Check your JOB_ID
-```
-oarstat
-```
-
-**Note:** Not forget to create symbolic link where it's necessary to logs results
-
-```
-ln -s /where/to/store/you/data data
-ln -s /where/to/store/you/results/ results
-ln -s /where/to/store/you/models_info models_info
-ln -s /where/to/store/you/saved_models saved_models
-```
-
-or simply use this script:
-```
-bash generate_symlinks.sh /where/to/store/you
-```
-
 ## License
 ## License
 
 
 [MIT](https://github.com/prise-3d/Thesis-NoiseDetection-CNN/blob/master/LICENSE)
 [MIT](https://github.com/prise-3d/Thesis-NoiseDetection-CNN/blob/master/LICENSE)

+ 46 - 41
cnn_models.py

@@ -2,11 +2,12 @@
 import sys
 import sys
 
 
 # model imports
 # model imports
-from keras.preprocessing.image import ImageDataGenerator
+# from keras.preprocessing.image import ImageDataGenerator
 from keras.models import Sequential, Model
 from keras.models import Sequential, Model
 from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Conv3D, MaxPooling3D, AveragePooling3D
 from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Conv3D, MaxPooling3D, AveragePooling3D
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
-from keras.applications.vgg19 import VGG19
+from tensorflow.keras import regularizers
+# from keras.applications.vgg19 import VGG19
 from keras import backend as K
 from keras import backend as K
 import tensorflow as tf
 import tensorflow as tf
 
 
@@ -35,35 +36,37 @@ def generate_model_2D(_input_shape):
 
 
     model.add(Flatten())
     model.add(Flatten())
 
 
-    model.add(Dense(140))
-    model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
+    model.add(Activation('relu'))
 
 
-    # model.add(Dense(120))
-    # model.add(Activation('sigmoid'))
-    # model.add(BatchNormalization())
-    # model.add(Dropout(0.5))
+    model.add(Dense(256, 
+        kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),
+        bias_regularizer=regularizers.l2(1e-4),
+        activity_regularizer=regularizers.l2(1e-5)))
 
 
-    model.add(Dense(80))
-    model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
-
-    model.add(Dense(40))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
-    model.add(BatchNormalization())
-    model.add(Dropout(0.5))
 
 
-    model.add(Dense(20))
-    model.add(Activation('relu'))
+    model.add(Dense(64, 
+        kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),
+        bias_regularizer=regularizers.l2(1e-4),
+        activity_regularizer=regularizers.l2(1e-5)))
+
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
+    model.add(Activation('relu'))
+
+    model.add(Dense(20, 
+        kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),
+        bias_regularizer=regularizers.l2(1e-4),
+        activity_regularizer=regularizers.l2(1e-5)))
 
 
     model.add(Dense(2))
     model.add(Dense(2))
     model.add(Activation('softmax'))
     model.add(Activation('softmax'))
 
 
-    model.compile(loss='categorical_crossentropy',
+    model.compile(loss='binary_crossentropy',
                   optimizer='adam',
                   optimizer='adam',
                   #metrics=['accuracy', metrics.auc])
                   #metrics=['accuracy', metrics.auc])
                   metrics=['accuracy'])
                   metrics=['accuracy'])
@@ -77,45 +80,47 @@ def generate_model_3D(_input_shape):
 
 
     print(_input_shape)
     print(_input_shape)
 
 
-    model.add(Conv3D(200, (1, 3, 3), input_shape=_input_shape))
+    model.add(Conv3D(60, (1, 3, 3), input_shape=_input_shape))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
 
 
-    model.add(Conv3D(100, (1, 3, 3)))
+    model.add(Conv3D(40, (1, 3, 3)))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
 
 
-    model.add(Conv3D(40, (1, 3, 3)))
+    model.add(Conv3D(20, (1, 3, 3)))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
     model.add(MaxPooling3D(pool_size=(1, 2, 2)))
 
 
     model.add(Flatten())
     model.add(Flatten())
 
 
-    model.add(Dense(256))
-    model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
-
-    model.add(Dense(128))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
-    model.add(BatchNormalization())
-    model.add(Dropout(0.5))
 
 
-    model.add(Dense(64))
-    model.add(Activation('relu'))
+    model.add(Dense(64, 
+        kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),
+        bias_regularizer=regularizers.l2(1e-4),
+        activity_regularizer=regularizers.l2(1e-5)))
+        
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
-
-    model.add(Dense(20))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
+
+    model.add(Dense(20, 
+        kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),
+        bias_regularizer=regularizers.l2(1e-4),
+        activity_regularizer=regularizers.l2(1e-5)))
+        
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
+    model.add(Activation('relu'))
 
 
     model.add(Dense(2))
     model.add(Dense(2))
     model.add(Activation('sigmoid'))
     model.add(Activation('sigmoid'))
 
 
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
+    model.compile(loss='binary_crossentropy',
+                  optimizer='adam',
                   #metrics=['accuracy', metrics.auc])
                   #metrics=['accuracy', metrics.auc])
                   metrics=['accuracy'])
                   metrics=['accuracy'])
 
 
@@ -123,7 +128,7 @@ def generate_model_3D(_input_shape):
 
 
 
 
 # using transfer learning (VGG19)
 # using transfer learning (VGG19)
-def generate_model_3D_TL(_input_shape):
+'''def generate_model_3D_TL(_input_shape):
 
 
     # load pre-trained model
     # load pre-trained model
     model = VGG19(weights='imagenet', include_top=False, input_shape=_input_shape)
     model = VGG19(weights='imagenet', include_top=False, input_shape=_input_shape)
@@ -134,7 +139,7 @@ def generate_model_3D_TL(_input_shape):
     for layer in model.layers[:5]:
     for layer in model.layers[:5]:
         layer.trainable = False
         layer.trainable = False
 
 
-    '''predictions_model = Sequential(model)
+    predictions_model = Sequential(model)
 
 
     predictions_model.add(Flatten(model.output))
     predictions_model.add(Flatten(model.output))
 
 
@@ -164,7 +169,7 @@ def generate_model_3D_TL(_input_shape):
     predictions_model.add(Dropout(0.5))
     predictions_model.add(Dropout(0.5))
 
 
     predictions_model.add(Dense(1))
     predictions_model.add(Dense(1))
-    predictions_model.add(Activation('sigmoid'))'''
+    predictions_model.add(Activation('sigmoid'))
 
 
     # adding custom Layers 
     # adding custom Layers 
     x = model.output
     x = model.output
@@ -191,16 +196,16 @@ def generate_model_3D_TL(_input_shape):
                 #   metrics=['accuracy', metrics.auc])
                 #   metrics=['accuracy', metrics.auc])
                   metrics=['accuracy'])
                   metrics=['accuracy'])
 
 
-    return model_final
+    return model_final'''
 
 
 
 
 def get_model(n_channels, _input_shape, _tl=False):
 def get_model(n_channels, _input_shape, _tl=False):
     
     
-    if _tl:
-        if n_channels == 3:
-            return generate_model_3D_TL(_input_shape)
-        else:
-            print("Can't use transfer learning with only 1 channel")
+    # if _tl:
+    #     if n_channels == 3:
+    #         return generate_model_3D_TL(_input_shape)
+    #     else:
+    #         print("Can't use transfer learning with only 1 channel")
 
 
     if n_channels == 1:
     if n_channels == 1:
         return generate_model_2D(_input_shape)
         return generate_model_2D(_input_shape)

+ 8 - 8
custom_config.py

@@ -1,9 +1,5 @@
-from modules.config.cnn_config import *
-
 import os
 import os
 
 
-# store all variables from cnn config
-context_vars = vars()
 
 
 # Custom config used for redefined config variables if necessary
 # Custom config used for redefined config variables if necessary
 
 
@@ -16,8 +12,8 @@ output_zones_learned            = os.path.join(output_data_folder, 'learned_zone
 output_models                   = os.path.join(output_data_folder, 'saved_models')
 output_models                   = os.path.join(output_data_folder, 'saved_models')
 output_results_folder           = os.path.join(output_data_folder, 'results')
 output_results_folder           = os.path.join(output_data_folder, 'results')
 
 
-## noisy_folder                    = 'noisy'
-## not_noisy_folder                = 'notNoisy'
+noisy_folder                    = 'noisy'
+not_noisy_folder                = 'notNoisy'
 backup_model_folder             = os.path.join(output_data_folder, 'models_backup')
 backup_model_folder             = os.path.join(output_data_folder, 'models_backup')
 
 
 # file or extensions
 # file or extensions
@@ -30,7 +26,7 @@ results_filename                = 'results.csv'
 perf_train_header_file          = "model_name;global_train_size;global_test_size;filtered_train_size;filtered_test_size;f1_train;f1_test;recall_train;recall_test;presicion_train;precision_test;acc_train;acc_test;roc_auc_train;roc_auc_test;\n"
 perf_train_header_file          = "model_name;global_train_size;global_test_size;filtered_train_size;filtered_test_size;f1_train;f1_test;recall_train;recall_test;presicion_train;precision_test;acc_train;acc_test;roc_auc_train;roc_auc_test;\n"
 perf_prediction_header_file    = "data;data_size;model_name;accucary;f1;recall;precision;roc;\n"
 perf_prediction_header_file    = "data;data_size;model_name;accucary;f1;recall;precision;roc;\n"
 
 
-features_choices_labels         = ['static', 'svd_reconstruction', 'fast_ica_reconstruction', 'ipca_reconstruction', 'min_diff_filter', 'sobel_based_filter','nl_mean_noise_mask']
+features_choices_labels         = ['static', 'svd_reconstruction', 'svd_reconstruction_dyn', 'fast_ica_reconstruction', 'ipca_reconstruction', 'min_diff_filter', 'sobel_based_filter','nl_mean_noise_mask', 'gini_map']
 
 
 # parameters
 # parameters
 
 
@@ -39,4 +35,8 @@ keras_epochs                    = 30
 ## keras_batch                     = 32
 ## keras_batch                     = 32
 ## val_dataset_size                = 0.2
 ## val_dataset_size                = 0.2
 
 
-keras_img_size                  = (200, 200)
+keras_img_size                  = (200, 200)
+
+# parameters
+scene_image_quality_separator     = '_'
+scene_image_extension             = '.png'

+ 0 - 265
generate/generate_dataset.py

@@ -1,265 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Jun 19 11:47:42 2019
-
-@author: jbuisine
-"""
-
-# main imports
-import sys, os, argparse
-import numpy as np
-import random
-
-# images processing imports
-from PIL import Image
-from ipfml.processing.segmentation import divide_in_blocks
-
-# modules imports
-sys.path.insert(0, '') # trick to enable import of main folder module
-
-import custom_config  as cfg
-from modules.utils import data as dt
-from modules.classes.Transformation import Transformation
-
-# getting configuration information
-zone_folder             = cfg.zone_folder
-learned_folder          = cfg.learned_zones_folder
-min_max_filename        = cfg.min_max_filename_extension
-
-# define all scenes values
-scenes_list             = cfg.scenes_names
-scenes_indices          = cfg.scenes_indices
-dataset_path            = cfg.dataset_path
-zones                   = cfg.zones_indices
-seuil_expe_filename     = cfg.seuil_expe_filename
-
-features_choices        = cfg.features_choices_labels
-output_data_folder      = cfg.output_data_folder
-
-generic_output_file_svd = '_random.csv'
-
-def generate_data_model(_filename, _transformations, _scenes_list, _nb_zones = 4, _random=0):
-
-    output_train_filename = _filename + ".train"
-    output_test_filename = _filename + ".test"
-
-    if not '/' in output_train_filename:
-        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
-
-    # create path if not exists
-    if not os.path.exists(output_data_folder):
-        os.makedirs(output_data_folder)
-
-    zones_indices = zones
-
-    train_file_data = []
-    test_file_data  = []
-
-    scenes = os.listdir(dataset_path)
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    # go ahead each scenes
-    for folder_scene in _scenes_list:
-
-        scene_path = os.path.join(dataset_path, folder_scene)
-
-        # shuffle list of zones (=> randomly choose zones)
-        # only in random mode
-        if _random:
-            random.shuffle(zones_indices)
-
-         # store zones learned
-        learned_zones_indices = zones_indices[:_nb_zones]
-
-        # write into file
-        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
-
-        if not os.path.exists(folder_learned_path):
-            os.makedirs(folder_learned_path)
-
-        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
-
-        with open(file_learned_path, 'w') as f:
-            for i in learned_zones_indices:
-                f.write(str(i) + ';')
-
-        for id_zone, index_folder in enumerate(zones_indices):
-
-            index_str = str(index_folder)
-            if len(index_str) < 2:
-                index_str = "0" + index_str
-            
-            current_zone_folder = "zone" + index_str
-            zone_path = os.path.join(scene_path, current_zone_folder)
-
-            # custom path for interval of reconstruction and metric
-
-            features_path = []
-
-            for transformation in _transformations:
-                
-                # check if it's a static content and create augmented images if necessary
-                if transformation.getName() == 'static':
-                    
-                    # {sceneName}/zoneXX/static
-                    static_metric_path = os.path.join(zone_path, transformation.getName())
-
-                    # img.png
-                    image_name = transformation.getParam().split('/')[-1]
-
-                    # {sceneName}/zoneXX/static/img
-                    image_prefix_name = image_name.replace('.png', '')
-                    image_folder_path = os.path.join(static_metric_path, image_prefix_name)
-                    
-                    if not os.path.exists(image_folder_path):
-                        os.makedirs(image_folder_path)
-
-                    features_path.append(image_folder_path)
-
-                    # get image path to manage
-                    # {sceneName}/static/img.png
-                    transform_image_path = os.path.join(scene_path, transformation.getName(), image_name) 
-                    static_transform_image = Image.open(transform_image_path)
-
-                    static_transform_image_block = divide_in_blocks(static_transform_image, cfg.sub_image_size)[id_zone]
-
-                    dt.augmented_data_image(static_transform_image_block, image_folder_path, image_prefix_name)
-
-                else:
-                    metric_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
-                    features_path.append(metric_interval_path)
-
-            # as labels are same for each metric
-            for label in os.listdir(features_path[0]):
-
-                label_features_path = []
-
-                for path in features_path:
-                    label_path = os.path.join(path, label)
-                    label_features_path.append(label_path)
-
-                # getting images list for each metric
-                features_images_list = []
-                    
-                for index_metric, label_path in enumerate(label_features_path):
-
-                    if _transformations[index_metric].getName() == 'static':
-                        # by default append nothing..
-                        features_images_list.append([])
-                    else:
-                        images = sorted(os.listdir(label_path))
-                        features_images_list.append(images)
-
-                # construct each line using all images path of each
-                for index_image in range(0, len(features_images_list[0])):
-                    
-                    images_path = []
-
-                    # get information about rotation and flip from first transformation (need to be a not static transformation)
-                    current_post_fix =  features_images_list[0][index_image].split(cfg.post_image_name_separator)[-1]
-
-                    # getting images with same index and hence name for each metric (transformation)
-                    for index_metric in range(0, len(features_path)):
-
-                        # custom behavior for static transformation (need to check specific image)
-                        if _transformations[index_metric].getName() == 'static':
-                            # add static path with selecting correct data augmented image
-                            image_name = _transformations[index_metric].getParam().split('/')[-1].replace('.png', '')
-                            img_path = os.path.join(features_path[index_metric], image_name + cfg.post_image_name_separator + current_post_fix)
-                            images_path.append(img_path)
-                        else:
-                            img_path = features_images_list[index_metric][index_image]
-                            images_path.append(os.path.join(label_features_path[index_metric], img_path))
-
-                    if label == cfg.noisy_folder:
-                        line = '1;'
-                    else:
-                        line = '0;'
-
-                    # compute line information with all images paths
-                    for id_path, img_path in enumerate(images_path):
-                        if id_path < len(images_path) - 1:
-                            line = line + img_path + '::'
-                        else:
-                            line = line + img_path
-                    
-                    line = line + '\n'
-
-                    if id_zone < _nb_zones:
-                        train_file_data.append(line)
-                    else:
-                        test_file_data.append(line)
-
-    train_file = open(output_train_filename, 'w')
-    test_file = open(output_test_filename, 'w')
-
-    random.shuffle(train_file_data)
-    random.shuffle(test_file_data)
-
-    for line in train_file_data:
-        train_file.write(line)
-
-    for line in test_file_data:
-        test_file.write(line)
-
-    train_file.close()
-    test_file.close()
-
-def main():
-
-    parser = argparse.ArgumentParser(description="Compute specific dataset for model using of metric")
-
-    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
-    parser.add_argument('--features', type=str, 
-                                     help="list of features choice in order to compute data",
-                                     default='svd_reconstruction, ipca_reconstruction',
-                                     required=True)
-    parser.add_argument('--params', type=str, 
-                                    help="list of specific param for each metric choice (See README.md for further information in 3D mode)", 
-                                    default='100, 200 :: 50, 25',
-                                    required=True)
-    parser.add_argument('--size', type=str, 
-                                  help="Size of input images",
-                                  default="100, 100")
-    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
-    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set', choices=list(range(1, 17)))
-    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
-
-    args = parser.parse_args()
-
-    p_filename = args.output
-    p_features  = list(map(str.strip, args.features.split(',')))
-    p_params   = list(map(str.strip, args.params.split('::')))
-    p_scenes   = args.scenes.split(',')
-    p_size     = args.size # not necessary to split here
-    p_nb_zones = args.nb_zones
-    p_random   = args.random
-
-    # create list of Transformation
-    transformations = []
-
-    for id, feature in enumerate(p_features):
-
-        if feature not in features_choices:
-            raise ValueError("Unknown metric, please select a correct metric : ", features_choices)
-
-        transformations.append(Transformation(feature, p_params[id], p_size))
-
-    if transformations[0].getName() == 'static':
-        raise ValueError("The first transformation in list cannot be static")
-
-    # Update: not use of renderer scenes list
-    # getting scenes from indexes user selection
-    scenes_selected = []
-
-    for scene_id in p_scenes:
-        index = scenes_indices.index(scene_id.strip())
-        scenes_selected.append(scenes_list[index])
-
-    # create database using img folder (generate first time only)
-    generate_data_model(p_filename, transformations, scenes_selected, p_nb_zones, p_random)
-
-if __name__== "__main__":
-    main()

+ 2 - 20
generate/generate_dataset_sequence_file.py

@@ -18,26 +18,8 @@ from ipfml.processing.segmentation import divide_in_blocks
 # modules imports
 # modules imports
 sys.path.insert(0, '') # trick to enable import of main folder module
 sys.path.insert(0, '') # trick to enable import of main folder module
 
 
-import custom_config  as cfg
-from modules.utils import data as dt
-from modules.classes.Transformation import Transformation
-
-# getting configuration information
-zone_folder             = cfg.zone_folder
-learned_folder          = cfg.learned_zones_folder
-min_max_filename        = cfg.min_max_filename_extension
-
-# define all scenes values
-scenes_list             = cfg.scenes_names
-scenes_indices          = cfg.scenes_indices
-dataset_path            = cfg.dataset_path
-zones                   = cfg.zones_indices
-seuil_expe_filename     = cfg.seuil_expe_filename
-
-features_choices        = cfg.features_choices_labels
-output_data_folder      = cfg.output_datasets
-
-generic_output_file_svd = '_random.csv'
+import config  as cfg
+from transformations import Transformation
 
 
 def generate_data_model(_filename, _transformations, _dataset_folder, _selected_zones, _sequence):
 def generate_data_model(_filename, _transformations, _dataset_folder, _selected_zones, _sequence):
 
 

+ 0 - 232
generate/generate_reconstructed_data.py

@@ -1,232 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Jun 19 11:47:42 2019
-
-@author: jbuisine
-"""
-
-# main imports
-import sys, os, argparse
-import numpy as np
-
-# images processing imports
-from PIL import Image
-from ipfml.processing.segmentation import divide_in_blocks
-
-# modules imports
-sys.path.insert(0, '') # trick to enable import of main folder module
-
-import custom_config as cfg
-from modules.utils.data import get_scene_image_quality
-from modules.classes.Transformation import Transformation
-
-# getting configuration information
-zone_folder             = cfg.zone_folder
-min_max_filename        = cfg.min_max_filename_extension
-
-# define all scenes values
-scenes_list             = cfg.scenes_names
-scenes_indices          = cfg.scenes_indices
-path                    = cfg.dataset_path
-zones                   = cfg.zones_indices
-seuil_expe_filename     = cfg.seuil_expe_filename
-
-features_choices        = cfg.features_choices_labels
-output_data_folder      = cfg.output_data_folder
-
-generic_output_file_svd = '_random.csv'
-
-def generate_data(transformation, _scenes, _replace):
-    """
-    @brief Method which generates all .csv files from scenes
-    @return nothing
-    """
-
-    scenes = os.listdir(path)
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    # go ahead each scenes
-    for id_scene, folder_scene in enumerate(scenes):
-
-        if folder_scene in _scenes:
-            print(folder_scene)
-            scene_path = os.path.join(path, folder_scene)
-
-            # construct each zones folder name
-            zones_folder = []
-            features_folder = []
-            zones_threshold = []
-
-            # get zones list info
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-
-                current_zone = "zone"+index_str
-                zones_folder.append(current_zone)
-                zone_path = os.path.join(scene_path, current_zone)
-
-                with open(os.path.join(zone_path, cfg.seuil_expe_filename)) as f:
-                    zones_threshold.append(int(f.readline()))
-
-                # custom path for feature
-                feature_path = os.path.join(zone_path, transformation.getName())
-
-                if not os.path.exists(feature_path):
-                    os.makedirs(feature_path)
-
-                # custom path for interval of reconstruction and feature
-                feature_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
-                features_folder.append(feature_interval_path)
-
-                if not os.path.exists(feature_interval_path):
-                    os.makedirs(feature_interval_path)
-
-                # create for each zone the labels folder
-                labels = [cfg.not_noisy_folder, cfg.noisy_folder]
-
-                for label in labels:
-                    label_folder = os.path.join(feature_interval_path, label)
-
-                    if not os.path.exists(label_folder):
-                        os.makedirs(label_folder)
-
-            # get all images of folder
-            scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
-            number_scene_image = len(scene_images)
-
-            # for each images
-            for id_img, img_path in enumerate(scene_images):
-
-                current_img = Image.open(img_path)
-                img_blocks = divide_in_blocks(current_img, cfg.sub_image_size)
-
-                current_quality_index = int(get_scene_image_quality(img_path))
-
-                for id_block, block in enumerate(img_blocks):
-
-                    ##########################
-                    # Image computation part #
-                    ##########################
-
-                    label_path = features_folder[id_block]
-
-                    # get label folder for block
-                    if current_quality_index > zones_threshold[id_block]:
-                        label_path = os.path.join(label_path, cfg.not_noisy_folder)
-                    else:
-                        label_path = os.path.join(label_path, cfg.noisy_folder)
-
-                    # check if necessary to compute or not images
-                    # Data augmentation!
-                    rotations = [0, 90, 180, 270]
-
-                    #img_flip_labels = ['original', 'horizontal', 'vertical', 'both']
-                    img_flip_labels = ['original', 'horizontal']
-
-                    output_images_path = []
-                    check_path_exists = []
-                    # rotate and flip image to increase dataset size
-                    for id, flip_label in enumerate(img_flip_labels):
-                        for rotation in rotations:
-                            output_reconstructed_filename = img_path.split('/')[-1].replace('.png', '') + '_' + zones_folder[id_block] + cfg.post_image_name_separator
-                            output_reconstructed_filename = output_reconstructed_filename + flip_label + '_' + str(rotation) + '.png'
-                            output_reconstructed_path = os.path.join(label_path, output_reconstructed_filename)
-
-                            if os.path.exists(output_reconstructed_path):
-                                check_path_exists.append(True)
-                            else:
-                                check_path_exists.append(False)
-
-                            output_images_path.append(output_reconstructed_path)
-
-                    # compute only if not exists or necessary to replace
-                    if _replace or not np.array(check_path_exists).all():
-                        # compute image
-                        # pass block to grey level
-                        output_block = transformation.getTransformedImage(block)
-                        output_block = np.array(output_block, 'uint8')
-                        
-                        # current output image
-                        output_block_img = Image.fromarray(output_block)
-
-                        horizontal_img = output_block_img.transpose(Image.FLIP_LEFT_RIGHT)
-                        #vertical_img = output_block_img.transpose(Image.FLIP_TOP_BOTTOM)
-                        #both_img = output_block_img.transpose(Image.TRANSPOSE)
-
-                        #flip_images = [output_block_img, horizontal_img, vertical_img, both_img]
-                        flip_images = [output_block_img, horizontal_img]
-
-                        # rotate and flip image to increase dataset size
-                        counter_index = 0 # get current path index
-                        for id, flip in enumerate(flip_images):
-                            for rotation in rotations:
-
-                                if _replace or not check_path_exists[counter_index]:
-                                    rotated_output_img = flip.rotate(rotation)
-                                    rotated_output_img.save(output_images_path[counter_index])
-
-                                counter_index +=1
-
-                print(transformation.getName() + "_" + folder_scene + " - " + "{0:.2f}".format(((id_img + 1) / number_scene_image)* 100.) + "%")
-                sys.stdout.write("\033[F")
-
-            print('\n')
-
-    print("%s_%s : end of data generation\n" % (transformation.getName(), transformation.getParam()))
-
-
-def main():
-
-    parser = argparse.ArgumentParser(description="Compute and prepare data of feature of all scenes using specific interval if necessary")
-
-    parser.add_argument('--features', type=str, 
-                                     help="list of features choice in order to compute data",
-                                     default='svd_reconstruction, ipca_reconstruction',
-                                     required=True)
-    parser.add_argument('--params', type=str, 
-                                    help="list of specific param for each feature choice (See README.md for further information in 3D mode)", 
-                                    default='100, 200 :: 50, 25',
-                                    required=True)
-    parser.add_argument('--size', type=str, 
-                                help="specific size of image", 
-                                default='100, 100',
-                                required=True)
-    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
-    parser.add_argument('--replace', type=int, help='replace previous picutre', default=1)
-
-    args = parser.parse_args()
-
-    p_features  = list(map(str.strip, args.features.split(',')))
-    p_params    = list(map(str.strip, args.params.split('::')))
-    p_size      = args.size
-    p_scenes    = args.scenes.split(',')
-    p_replace   = bool(args.replace)
-
-    # getting scenes from indexes user selection
-    scenes_selected = []
-
-    for scene_id in p_scenes:
-        index = scenes_indices.index(scene_id.strip())
-        scenes_selected.append(scenes_list[index])
-
-    # list of transformations
-    transformations = []
-
-    for id, feature in enumerate(p_features):
-
-        if feature not in features_choices or feature == 'static':
-            raise ValueError("Unknown feature, please select a correct feature (`static` excluded) : ", features_choices)
-
-        transformations.append(Transformation(feature, p_params[id], p_size))
-
-    print("Scenes used", scenes_selected)
-    # generate all or specific feature data
-    for transformation in transformations:
-        generate_data(transformation, scenes_selected, p_replace)
-
-if __name__== "__main__":
-    main()

+ 13 - 12
generate/generate_reconstructed_folder.py

@@ -13,20 +13,21 @@ import numpy as np
 # images processing imports
 # images processing imports
 from PIL import Image
 from PIL import Image
 from ipfml.processing.segmentation import divide_in_blocks
 from ipfml.processing.segmentation import divide_in_blocks
+from transformations import Transformation
 
 
 # modules imports
 # modules imports
 sys.path.insert(0, '') # trick to enable import of main folder module
 sys.path.insert(0, '') # trick to enable import of main folder module
 
 
-import custom_config as cfg
-from modules.utils.data import get_scene_image_quality
-from modules.classes.Transformation import Transformation
+import config as cfg
+zones = np.arange(16)
 
 
-# getting configuration information
-zone_folder             = cfg.zone_folder
+def get_scene_image_quality(img_path):
 
 
-# define all scenes values
-zones                   = cfg.zones_indices
-features_choices        = cfg.features_choices_labels
+    # if path getting last element (image name) and extract quality
+    img_postfix = img_path.split('/')[-1].split(cfg.scene_image_quality_separator)[-1]
+    img_quality = img_postfix.replace(cfg.scene_image_extension, '')
+
+    return int(img_quality)
 
 
 '''
 '''
 Display progress information as progress bar
 Display progress information as progress bar
@@ -195,8 +196,8 @@ def generate_data(transformation, _dataset_path, _output, _human_thresholds, _re
                         filename_parts = filename.split('_')
                         filename_parts = filename.split('_')
 
 
                         # get samples : `00XXX`
                         # get samples : `00XXX`
-                        n_samples = filename_parts[2]
-                        del filename_parts[2]
+                        n_samples = filename_parts[-1]
+                        del filename_parts[-1]
 
 
                         # `p3d_XXXXXX`
                         # `p3d_XXXXXX`
                         output_reconstructed = '_'.join(filename_parts)
                         output_reconstructed = '_'.join(filename_parts)
@@ -259,8 +260,8 @@ def main():
 
 
     for id, feature in enumerate(p_features):
     for id, feature in enumerate(p_features):
 
 
-        if feature not in features_choices or feature == 'static':
-            raise ValueError("Unknown feature {0}, please select a correct feature (`static` excluded) : {1}".format(feature, features_choices))
+        if feature not in cfg.features_choices_labels or feature == 'static':
+            raise ValueError("Unknown feature {0}, please select a correct feature (`static` excluded) : {1}".format(feature, cfg.features_choices_labels))
         
         
         transformations.append(Transformation(feature, p_params[id], p_size))
         transformations.append(Transformation(feature, p_params[id], p_size))
 
 

+ 278 - 0
generate/transformations.py

@@ -0,0 +1,278 @@
+# main imports
+import os
+import numpy as np
+
+# image processing imports
+from ipfml.processing import transform, compression
+from ipfml.processing import reconstruction
+from ipfml.filters import convolution, kernels
+from ipfml import utils
+import cv2
+from skimage.restoration import denoise_nl_means, estimate_sigma
+
+from PIL import Image
+
+
+def remove_pixel(img, limit):
+    
+    width, height = img.shape
+    
+    output = np.zeros((width, height))
+    
+    for i in range(width):
+        for j in range(height):
+            
+            if img[i,j] <= limit:
+                output[i,j] = img[i,j]
+                
+    return output
+
+
+def get_random_value(distribution):
+    rand = random.uniform(0, 1)
+    prob_sum = 0.
+    
+    for id, prob in enumerate(distribution):
+        
+        prob_sum += prob
+        
+        if prob_sum >= rand:
+            return id
+        
+    return len(distribution) - 1
+
+
+def distribution_from_data(data):
+    
+    occurences = np.array([data.count(x) for x in set(data)])
+    max_occurences = sum(occurences)
+    
+    return occurences / max_occurences
+
+
+def fill_image_with_rand_value(img, func, value_to_replace):
+    
+    width, height = img.shape
+    
+    output = np.zeros((width, height))
+    
+    for i in range(width):
+        for j in range(height):
+            
+            if img[i,j] == value_to_replace:
+                output[i, j] = func()
+            else:
+                output[i, j] = img[i, j]
+                
+    return output
+
+def _compute_relative_error(ref_sv, k_sv):
+    ref = np.sqrt(np.sum(np.square(ref_sv)))
+    k = np.sqrt(np.sum(np.square(k_sv)))
+
+    return k / ref
+
+def _find_n_components(block, e=0.1):
+
+    s = transform.get_LAB_L_SVD_s(block)
+    
+    errors = []
+    found = False
+    k_components = None
+    
+    for i in range(len(s)):
+        
+        #Ak = reconstruction.svd(img, [0, i])
+        #error = compute_relative_error_matrix(A, Ak)
+        error = _compute_relative_error(s, s[i:])
+        errors.append(error)
+        
+        if error < e and not found:
+            k_components = (i + 1)
+            found = True
+            
+    return (k_components, errors)
+
+# Transformation class to store transformation method of image and get usefull information
+class Transformation():
+
+    def __init__(self, _transformation, _param, _size):
+        self.transformation = _transformation
+        self.param = _param
+        self.size = _size
+
+    def getTransformedImage(self, img):
+
+        if self.transformation == 'svd_reconstruction':
+            begin, end = list(map(int, self.param.split(',')))
+            h, w = list(map(int, self.size.split(',')))
+            img_reconstructed = reconstruction.svd(img, [begin, end])
+            data_array = np.array(img_reconstructed, 'uint8')
+
+            img_array = Image.fromarray(data_array)
+            img_array.thumbnail((h, w))
+
+            data = np.array(img_array)
+
+        if self.transformation == 'svd_reconstruction':
+            begin, end = list(map(int, self.param.split(',')))
+            h, w = list(map(int, self.size.split(',')))
+            img_reconstructed = reconstruction.svd(img, [begin, end])
+            data_array = np.array(img_reconstructed, 'uint8')
+
+            img_array = Image.fromarray(data_array)
+            img_array.thumbnail((h, w))
+
+            data = np.array(img_array)
+
+        if self.transformation == 'ipca_reconstruction':
+            n_components, batch_size = list(map(int, self.param.split(',')))
+            h, w = list(map(int, self.size.split(',')))
+            img_reconstructed = reconstruction.ipca(img, n_components, batch_size)
+            data_array = np.array(img_reconstructed, 'uint8')
+            
+            img_array = Image.fromarray(data_array)
+            img_array.thumbnail((h, w))
+
+            data = np.array(img_array)
+
+        if self.transformation == 'fast_ica_reconstruction':
+            n_components = self.param
+            h, w = list(map(int, self.size.split(',')))
+            img_reconstructed = reconstruction.fast_ica(img, n_components)
+            data_array = np.array(img_reconstructed, 'uint8')
+            
+            img_array = Image.fromarray(data_array)
+            img_array.thumbnail((h, w))
+
+            data = np.array(img_array)
+
+        if self.transformation == 'gini_map':
+            # kernel size
+            k_w, k_h = list(map(int, self.param.split(',')))
+            h, w = list(map(int, self.size.split(',')))
+
+            lab_img = transform.get_LAB_L(img)
+            img_mask = convolution.convolution2D(lab_img, kernels.gini, (k_w, k_h))
+
+            # renormalize data
+            data_array = np.array(img_mask * 255, 'uint8')
+
+            img_array = Image.fromarray(data_array)
+            img_array.thumbnail((h, w))
+
+            data = np.array(img_array)
+
+        if self.transformation == 'sobel_based_filter':
+            k_size, p_limit = list(map(int, self.param.split(',')))
+            h, w = list(map(int, self.size.split(',')))
+
+            lab_img = transform.get_LAB_L(img)
+
+            weight, height = lab_img.shape
+
+            sobelx = cv2.Sobel(lab_img, cv2.CV_64F, 1, 0, ksize=k_size)
+            sobely = cv2.Sobel(lab_img, cv2.CV_64F, 0, 1,ksize=k_size)
+
+            sobel_mag = np.array(np.hypot(sobelx, sobely), 'uint8')  # magnitude
+            sobel_mag_limit = remove_pixel(sobel_mag, p_limit)
+
+            # use distribution value of pixel to fill `0` values
+            sobel_mag_limit_without_0 = [x for x in sobel_mag_limit.reshape((weight*height)) if x != 0]  
+            distribution = distribution_from_data(sobel_mag_limit_without_0)
+            min_value = int(min(sobel_mag_limit_without_0))
+            l = lambda: get_random_value(distribution) + min_value
+            img_reconstructed = fill_image_with_rand_value(sobel_mag_limit, l, 0)
+            
+            img_reconstructed_norm = utils.normalize_2D_arr(img_reconstructed)
+            img_reconstructed_norm = np.array(img_reconstructed_norm*255, 'uint8')
+            sobel_reconstructed = Image.fromarray(img_reconstructed_norm)
+            sobel_reconstructed.thumbnail((h, w))
+        
+            data = np.array(sobel_reconstructed)
+
+        if self.transformation == 'nl_mean_noise_mask':
+            patch_size, patch_distance = list(map(int, self.param.split(',')))
+            h, w = list(map(int, self.size.split(',')))
+
+            img = np.array(img)
+            sigma_est = np.mean(estimate_sigma(img, multichannel=True))
+    
+            patch_kw = dict(patch_size=patch_size,      # 5x5 patches
+                            patch_distance=patch_distance,  # 13x13 search area
+                            multichannel=True)
+
+            # slow algorithm
+            denoise = denoise_nl_means(img, h=0.8 * sigma_est, sigma=sigma_est,
+                                    fast_mode=False,
+                                    **patch_kw)
+            
+            denoise = np.array(denoise, 'uint8')
+            noise_mask = np.abs(denoise - img)
+            
+            data_array = np.array(noise_mask, 'uint8')
+            
+            img_array = Image.fromarray(data_array)
+            img_array.thumbnail((h, w))
+
+            data = np.array(img_array)
+            
+        if self.transformation == 'static':
+            # static content, we keep input as it is
+            data = img
+
+        return data
+    
+    def getTransformationPath(self):
+
+        path = self.transformation
+
+        if self.transformation == 'svd_reconstruction':
+            begin, end = list(map(int, self.param.split(',')))
+            w, h = list(map(int, self.size.split(',')))
+            path = os.path.join(path, str(begin) + '_' + str(end) + '_S_' + str(w) + '_' + str(h))
+        
+        if self.transformation == 'gini_map':
+            k_w, k_h = list(map(int, self.param.split(',')))
+            w, h = list(map(int, self.size.split(',')))
+            path = os.path.join(path, str(k_w) + '_' + str(k_h) + '_S_' + str(w) + '_' + str(h))
+
+        if self.transformation == 'ipca_reconstruction':
+            n_components, batch_size = list(map(int, self.param.split(',')))
+            w, h = list(map(int, self.size.split(',')))
+            path = os.path.join(path, 'N' + str(n_components) + '_' + str(batch_size) + '_S_' + str(w) + '_' + str(h))
+
+        if self.transformation == 'fast_ica_reconstruction':
+            n_components = self.param
+            w, h = list(map(int, self.size.split(',')))
+            path = os.path.join(path, 'N' + str(n_components) + '_S_' + str(w) + '_' + str(h))
+
+        if self.transformation == 'min_diff_filter':
+            w_size, h_size, stride = list(map(int, self.param.split(',')))
+            w, h = list(map(int, self.size.split(',')))
+            path = os.path.join(path, 'W_' + str(w_size) + '_' + str(h_size) + '_Stride_' + str(stride) + '_S_' + str(w) + '_' + str(h))
+
+        if self.transformation == 'sobel_based_filter':
+            k_size, p_limit = list(map(int, self.param.split(',')))
+            h, w = list(map(int, self.size.split(',')))
+            path = os.path.join(path, 'K_' + str(k_size) + '_L' + str(p_limit) + '_S_' + str(w) + '_' + str(h))
+
+        if self.transformation == 'nl_mean_noise_mask':
+            patch_size, patch_distance = list(map(int, self.param.split(',')))
+            h, w = list(map(int, self.size.split(',')))
+            path = os.path.join(path, 'S' + str(patch_size) + '_D' + str(patch_distance) + '_S_' + str(w) + '_' + str(h))
+
+        if self.transformation == 'static':
+            # param contains image name to find for each scene
+            path = self.param
+
+        return path
+
+    def getName(self):
+        return self.transformation
+
+    def getParam(self):
+        return self.param
+
+    def __str__( self ):
+        return self.transformation + ' transformation with parameter : ' + self.param

+ 0 - 1
modules

@@ -1 +0,0 @@
-Subproject commit 270de3a969ff3121e68f435cc6a3b570ba5b9d69

+ 0 - 15
oar.example.sh

@@ -1,15 +0,0 @@
-#!/bin/sh
-
-#OAR --array-param-file params.txt
-#OAR -l /nodes=1,walltime=6:00:00
-#OAR -p host="orval02"
-#OAR -t besteffort
-#OAR --notify mail:jerome.buisine@univ-littoral.fr
-#OAR -O /nfs/home/lisic/jbuisine/projects/launchers/logs/Thesis-NoiseDetection-CNN.%jobid%.out
-#OAR -E /nfs/home/lisic/jbuisine/projects/launchers/logs/Thesis-NoiseDetection-CNN.%jobid%.err
-
-# Activiate venv used by python
-. ~/opt/venvs/thesis-venv/bin/activate
-
-# run command
-python ~/projects/Thesis-NoiseDetection-CNN/generate/generate_reconstructed_data.py $@

+ 1 - 5
prediction/estimate_thresholds_lstm.py

@@ -91,11 +91,7 @@ def main():
     # 2. load model and compile it
     # 2. load model and compile it
 
 
     # TODO : check kind of model
     # TODO : check kind of model
-    model = joblib.load(p_model)
-    model.compile(loss='binary_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-    # model = load_model(p_model)
+    model = load_model(p_model)
     # model.compile(loss='binary_crossentropy',
     # model.compile(loss='binary_crossentropy',
     #               optimizer='rmsprop',
     #               optimizer='rmsprop',
     #               metrics=['accuracy'])
     #               metrics=['accuracy'])

+ 26 - 15
train_lstm_weighted.py

@@ -5,6 +5,7 @@ import pandas as pd
 import os
 import os
 import ctypes
 import ctypes
 from PIL import Image
 from PIL import Image
+import cv2
 
 
 from keras import backend as K
 from keras import backend as K
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
@@ -50,7 +51,7 @@ def write_progress(progress):
     sys.stdout.write("\033[F")
     sys.stdout.write("\033[F")
 
 
 
 
-def build_input(df, seq_norm):
+def build_input(df, seq_norm, p_chanels):
     """Convert dataframe to numpy array input with timesteps as float array
     """Convert dataframe to numpy array input with timesteps as float array
     
     
     Arguments:
     Arguments:
@@ -76,10 +77,15 @@ def build_input(df, seq_norm):
             seq_elems = []
             seq_elems = []
 
 
             # for each element in sequence data
             # for each element in sequence data
-            for img_path in column:
-                img = Image.open(img_path)
+            for i, img_path in enumerate(column):
+
                 # seq_elems.append(np.array(img).flatten())
                 # seq_elems.append(np.array(img).flatten())
-                seq_elems.append(np.array(img) / 255.)
+                if p_chanels[i] > 1:
+                    img = cv2.imread(img_path)
+                else:
+                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
+                
+                seq_elems.append(np.array(img, 'float32') / 255.)
 
 
             #seq_arr.append(np.array(seq_elems).flatten())
             #seq_arr.append(np.array(seq_elems).flatten())
             seq_arr.append(np.array(seq_elems))
             seq_arr.append(np.array(seq_elems))
@@ -154,9 +160,11 @@ def create_model(_input_shape):
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
 
 
     model.add(Flatten())
     model.add(Flatten())
-    model.add(Dense(512, activation='sigmoid'))
+    model.add(Dense(512, activation='relu'))
+    model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
-    model.add(Dense(128, activation='sigmoid'))
+    model.add(Dense(128, activation='relu'))
+    model.add(BatchNormalization())
     model.add(Dropout(0.5))
     model.add(Dropout(0.5))
     model.add(Dense(1, activation='sigmoid'))
     model.add(Dense(1, activation='sigmoid'))
     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
@@ -176,6 +184,7 @@ def main():
     parser.add_argument('--train', type=str, help='input train dataset', required=True)
     parser.add_argument('--train', type=str, help='input train dataset', required=True)
     parser.add_argument('--test', type=str, help='input test dataset', required=True)
     parser.add_argument('--test', type=str, help='input test dataset', required=True)
     parser.add_argument('--output', type=str, help='output model name', required=True)
     parser.add_argument('--output', type=str, help='output model name', required=True)
+    parser.add_argument('--chanels', type=str, help="given number of ordered chanels (example: '1,3,3') for each element of window", required=True)
     parser.add_argument('--epochs', type=int, help='number of expected epochs', default=30)
     parser.add_argument('--epochs', type=int, help='number of expected epochs', default=30)
     parser.add_argument('--batch_size', type=int, help='expected batch size for training model', default=64)
     parser.add_argument('--batch_size', type=int, help='expected batch size for training model', default=64)
     parser.add_argument('--seq_norm', type=int, help='normalization sequence by features', choices=[0, 1], default=0)
     parser.add_argument('--seq_norm', type=int, help='normalization sequence by features', choices=[0, 1], default=0)
@@ -185,6 +194,7 @@ def main():
     p_train        = args.train
     p_train        = args.train
     p_test         = args.test
     p_test         = args.test
     p_output       = args.output
     p_output       = args.output
+    p_chanels     = list(map(int, args.chanels.split(',')))
     p_epochs       = args.epochs
     p_epochs       = args.epochs
     p_batch_size   = args.batch_size
     p_batch_size   = args.batch_size
     p_seq_norm     = bool(args.seq_norm)
     p_seq_norm     = bool(args.seq_norm)
@@ -234,11 +244,11 @@ def main():
 
 
     # split dataset into X_train, y_train, X_test, y_test
     # split dataset into X_train, y_train, X_test, y_test
     X_train_all = final_df_train.loc[:, 1:].apply(lambda x: x.astype(str).str.split('::'))
     X_train_all = final_df_train.loc[:, 1:].apply(lambda x: x.astype(str).str.split('::'))
-    X_train_all = build_input(X_train_all, p_seq_norm)
+    X_train_all = build_input(X_train_all, p_seq_norm, p_chanels)
     y_train_all = final_df_train.loc[:, 0].astype('int')
     y_train_all = final_df_train.loc[:, 0].astype('int')
 
 
     X_test = final_df_test.loc[:, 1:].apply(lambda x: x.astype(str).str.split('::'))
     X_test = final_df_test.loc[:, 1:].apply(lambda x: x.astype(str).str.split('::'))
-    X_test = build_input(X_test, p_seq_norm)
+    X_test = build_input(X_test, p_seq_norm, p_chanels)
     y_test = final_df_test.loc[:, 0].astype('int')
     y_test = final_df_test.loc[:, 0].astype('int')
 
 
     input_shape = (X_train_all.shape[1], X_train_all.shape[2], X_train_all.shape[3], X_train_all.shape[4])
     input_shape = (X_train_all.shape[1], X_train_all.shape[2], X_train_all.shape[3], X_train_all.shape[4])
@@ -305,12 +315,13 @@ def main():
     # train_score, train_acc = model.evaluate(X_train, y_train, batch_size=1)
     # train_score, train_acc = model.evaluate(X_train, y_train, batch_size=1)
 
 
     # print(train_acc)
     # print(train_acc)
-    y_train_predict = model.predict_classes(X_train)
-    y_val_predict = model.predict_classes(X_val)
-    y_test_predict = model.predict_classes(X_test)
+    y_train_predict = model.predict(X_train, batch_size=1, verbose=1)
+    y_val_predict = model.predict(X_val, batch_size=1, verbose=1)
+    y_test_predict = model.predict(X_test, batch_size=1, verbose=1)
 
 
-    print(y_train_predict)
-    print(y_test_predict)
+    y_train_predict = [ 1 if l > 0.5 else 0 for l in y_train_predict ]
+    y_val_predict = [ 1 if l > 0.5 else 0 for l in y_val_predict ]
+    y_test_predict = [ 1 if l > 0.5 else 0 for l in y_test_predict ]
 
 
     auc_train = roc_auc_score(y_train, y_train_predict)
     auc_train = roc_auc_score(y_train, y_train_predict)
     auc_val = roc_auc_score(y_val, y_val_predict)
     auc_val = roc_auc_score(y_val, y_val_predict)
@@ -338,11 +349,11 @@ def main():
     model_history = os.path.join(cfg.output_results_folder, p_output + '.png')
     model_history = os.path.join(cfg.output_results_folder, p_output + '.png')
     plt.savefig(model_history)
     plt.savefig(model_history)
 
 
-    # save model using joblib
+    # save model using keras API
     if not os.path.exists(cfg.output_models):
     if not os.path.exists(cfg.output_models):
         os.makedirs(cfg.output_models)
         os.makedirs(cfg.output_models)
 
 
-    dump(model, os.path.join(cfg.output_models, p_output + '.joblib'))
+    model.save(os.path.join(cfg.output_models, p_output + '.h5'))
 
 
     # save model results
     # save model results
     if not os.path.exists(cfg.output_results_folder):
     if not os.path.exists(cfg.output_results_folder):

+ 30 - 44
train_model.py

@@ -56,7 +56,7 @@ def main():
     parser.add_argument('--tl', type=int, help='use or not of transfer learning (`VGG network`)', default=0, choices=[0, 1])
     parser.add_argument('--tl', type=int, help='use or not of transfer learning (`VGG network`)', default=0, choices=[0, 1])
     parser.add_argument('--batch_size', type=int, help='batch size used as model input', default=64)
     parser.add_argument('--batch_size', type=int, help='batch size used as model input', default=64)
     parser.add_argument('--epochs', type=int, help='number of epochs used for training model', default=30)
     parser.add_argument('--epochs', type=int, help='number of epochs used for training model', default=30)
-    parser.add_argument('--chanels', type=int, help="given number of chanels if necessary", default=0)
+    parser.add_argument('--chanels', type=str, help="given number of ordered chanels for each input images (example: '1,3,3')", required=True)
     parser.add_argument('--size', type=str, help="Size of input images", default="100, 100")
     parser.add_argument('--size', type=str, help="Size of input images", default="100, 100")
     parser.add_argument('--val_size', type=float, help='percent of validation data during training process', default=0.3)
     parser.add_argument('--val_size', type=float, help='percent of validation data during training process', default=0.3)
 
 
@@ -68,7 +68,7 @@ def main():
     p_tl          = args.tl
     p_tl          = args.tl
     p_batch_size  = args.batch_size
     p_batch_size  = args.batch_size
     p_epochs      = args.epochs
     p_epochs      = args.epochs
-    p_chanels     = args.chanels
+    p_chanels     = list(map(int, args.chanels.split(',')))
     p_size        = args.size.split(',')
     p_size        = args.size.split(',')
     p_val_size    = args.val_size
     p_val_size    = args.val_size
 
 
@@ -94,10 +94,7 @@ def main():
     print("--Reading all images data...")
     print("--Reading all images data...")
 
 
     # getting number of chanel
     # getting number of chanel
-    if p_chanels == 0:
-        n_chanels = len(dataset_train[1][1].split('::'))
-    else:
-        n_chanels = p_chanels
+    n_chanels = sum(p_chanels)
 
 
     print("-- Number of chanels : ", n_chanels)
     print("-- Number of chanels : ", n_chanels)
     img_width, img_height = [ int(s) for s in p_size ]
     img_width, img_height = [ int(s) for s in p_size ]
@@ -145,44 +142,30 @@ def main():
 
 
     final_df_train = dataset_train
     final_df_train = dataset_train
     final_df_test = dataset_test
     final_df_test = dataset_test
-    
-    def load_multiple_greyscale(x):
-        # update progress
-        global n_counter
-        n_counter += 1
-        write_progress(n_counter / float(total_samples))
-        return [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')]
 
 
-    def load_greyscale(x):
+    def load_images(x):
         # update progress
         # update progress
         global n_counter
         global n_counter
         n_counter += 1
         n_counter += 1
         write_progress(n_counter / float(total_samples))
         write_progress(n_counter / float(total_samples))
-        return cv2.imread(x, cv2.IMREAD_GRAYSCALE)
 
 
-    def load_rgb(x):
-        # update progress
-        global n_counter
-        n_counter += 1
-        write_progress(n_counter / float(total_samples))
-        return cv2.imread(x)
+        images = []
+        for i, path in enumerate(x.split('::')):
+            if p_chanels[i] > 1:
+                img = cv2.imread(path)
+            else:
+                img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
+            images.append(img)
+
+        return images
 
 
 
 
     print('---- Loading dataset.... ----')
     print('---- Loading dataset.... ----')
     print('-----------------------------\n')
     print('-----------------------------\n')
 
 
     # check if specific number of chanels is used
     # check if specific number of chanels is used
-    if p_chanels == 0:
-        # `::` is the separator used for getting each img path
-        if n_chanels > 1:
-            final_df_train[1] = final_df_train[1].apply(lambda x: load_multiple_greyscale(x))
-            final_df_test[1] = final_df_test[1].apply(lambda x: load_multiple_greyscale(x))
-        else:
-            final_df_train[1] = final_df_train[1].apply(lambda x: load_greyscale(x))
-            final_df_test[1] = final_df_test[1].apply(lambda x: load_greyscale(x))
-    else:
-        final_df_train[1] = final_df_train[1].apply(lambda x: load_rgb(x))
-        final_df_test[1] = final_df_test[1].apply(lambda x: load_rgb(x))
+    final_df_train[1] = final_df_train[1].apply(lambda x: load_images(x))
+    final_df_test[1] = final_df_test[1].apply(lambda x: load_images(x))
 
 
     # reshape array data
     # reshape array data
     final_df_train[1] = final_df_train[1].apply(lambda x: np.array(x).reshape(input_shape))
     final_df_train[1] = final_df_train[1].apply(lambda x: np.array(x).reshape(input_shape))
@@ -238,7 +221,8 @@ def main():
 
 
     if len(backups) > 0:
     if len(backups) > 0:
         last_backup_file = backups[-1]
         last_backup_file = backups[-1]
-        model = load_model(last_backup_file)
+        last_backup_file_path = os.path.join(model_backup_folder, last_backup_file)
+        model = load_model(last_backup_file_path)
 
 
         # get initial epoch
         # get initial epoch
         initial_epoch = int(last_backup_file.split('_')[-1].replace('.h5', ''))
         initial_epoch = int(last_backup_file.split('_')[-1].replace('.h5', ''))
@@ -254,22 +238,22 @@ def main():
     # prepare train and validation dataset
     # prepare train and validation dataset
     X_train, X_val, y_train, y_val = train_test_split(x_data_train, y_dataset_train, test_size=p_val_size, shuffle=False)
     X_train, X_val, y_train, y_val = train_test_split(x_data_train, y_dataset_train, test_size=p_val_size, shuffle=False)
 
 
-    y_train = to_categorical(y_train)
-    y_val = to_categorical(y_val)
-    y_test = to_categorical(y_dataset_test)
+    y_train_cat = to_categorical(y_train)
+    y_val_cat = to_categorical(y_val)
+    y_test_cat = to_categorical(y_dataset_test)
 
 
     print('-----------------------------')
     print('-----------------------------')
     print("-- Fitting model with custom class_weight", class_weight)
     print("-- Fitting model with custom class_weight", class_weight)
     print('-----------------------------')
     print('-----------------------------')
-    model.fit(X_train, y_train, 
-        validation_data=(X_val, y_val), 
+    model.fit(X_train, y_train_cat, 
+        validation_data=(X_val, y_val_cat), 
         initial_epoch=initial_epoch, 
         initial_epoch=initial_epoch, 
         epochs=p_epochs, 
         epochs=p_epochs, 
         batch_size=p_batch_size, 
         batch_size=p_batch_size, 
         callbacks=callbacks_list, 
         callbacks=callbacks_list, 
         class_weight=class_weight)
         class_weight=class_weight)
 
 
-    score = model.evaluate(X_val, y_val, batch_size=p_batch_size)
+    score = model.evaluate(X_val, y_val_cat, batch_size=p_batch_size)
 
 
     print("Accuracy score on val dataset ", score)
     print("Accuracy score on val dataset ", score)
 
 
@@ -280,21 +264,23 @@ def main():
     model_output_path = os.path.join(cfg.output_models, p_output + '.h5')
     model_output_path = os.path.join(cfg.output_models, p_output + '.h5')
     model.save(model_output_path)
     model.save(model_output_path)
 
 
+    print('Begin of prediction score on the whole dataset:')
     # Get results obtained from model
     # Get results obtained from model
-    y_train_prediction = model.predict(X_train)
-    y_val_prediction = model.predict(X_val)
-    y_test_prediction = model.predict(x_dataset_test)
+    y_train_prediction = model.predict(X_train, verbose=1)
+    y_val_prediction = model.predict(X_val, verbose=1)
+    y_test_prediction = model.predict(x_data_test, verbose=1)
 
 
     y_train_prediction = np.argmax(y_train_prediction, axis=1)
     y_train_prediction = np.argmax(y_train_prediction, axis=1)
     y_val_prediction = np.argmax(y_val_prediction, axis=1)
     y_val_prediction = np.argmax(y_val_prediction, axis=1)
+    y_test_prediction = np.argmax(y_test_prediction, axis=1)
 
 
     acc_train_score = accuracy_score(y_train, y_train_prediction)
     acc_train_score = accuracy_score(y_train, y_train_prediction)
     acc_val_score = accuracy_score(y_val, y_val_prediction)
     acc_val_score = accuracy_score(y_val, y_val_prediction)
-    acc_test_score = accuracy_score(y_test, y_test_prediction)
+    acc_test_score = accuracy_score(y_dataset_test, y_test_prediction)
 
 
     roc_train_score = roc_auc_score(y_train, y_train_prediction)
     roc_train_score = roc_auc_score(y_train, y_train_prediction)
     roc_val_score = roc_auc_score(y_val, y_val_prediction)
     roc_val_score = roc_auc_score(y_val, y_val_prediction)
-    roc_test_score = roc_auc_score(y_test, y_val_prediction)
+    roc_test_score = roc_auc_score(y_dataset_test, y_test_prediction)
 
 
     # save model performance
     # save model performance
     if not os.path.exists(cfg.output_results_folder):
     if not os.path.exists(cfg.output_results_folder):