Browse Source

First initialization of the project

Jérôme BUISINE 7 months ago
parent
commit
021a6c2dc3
83 changed files with 5580 additions and 0 deletions
  1. 34 0
      .gitignore
  2. 6 0
      .gitmodules
  3. 9 0
      LICENSE
  4. 151 0
      README.md
  5. 23 0
      custom_config.py
  6. 127 0
      data_attributes.py
  7. 57 0
      data_processing/generateAndTrain_maxwell_custom.sh
  8. 68 0
      data_processing/generateAndTrain_maxwell_custom_optimization.sh
  9. 147 0
      display/display_reconstructed_image_from_humans.py
  10. 184 0
      display/display_reconstructed_image_from_simulation.py
  11. 128 0
      display/display_simulation_curves.py
  12. 189 0
      generate/generate_all_data.py
  13. 272 0
      generate/generate_data_model.py
  14. 299 0
      generate/generate_data_model_random_all.py
  15. 310 0
      generate/generate_data_model_random_center.py
  16. 309 0
      generate/generate_data_model_random_split.py
  17. 95 0
      models.py
  18. 5 0
      modules/.gitignore
  19. 8 0
      modules/LICENSE
  20. 42 0
      modules/README.md
  21. 0 0
      modules/__init__.py
  22. 90 0
      modules/classes/Transformation.py
  23. 0 0
      modules/classes/__init__.py
  24. 0 0
      modules/config/__init__.py
  25. 19 0
      modules/config/attributes_config.py
  26. 21 0
      modules/config/cnn_config.py
  27. 38 0
      modules/config/global_config.py
  28. 34 0
      modules/oar/README.md
  29. 39 0
      modules/oar/generate_symlinks.sh
  30. 15 0
      modules/oar/oar.example.sh
  31. 5 0
      modules/requirements.txt
  32. 0 0
      modules/utils/__init__.py
  33. 82 0
      modules/utils/data.py
  34. 60 0
      optimization/.gitignore
  35. 8 0
      optimization/LICENSE
  36. 29 0
      optimization/README.md
  37. 0 0
      optimization/__init__.py
  38. 170 0
      optimization/algorithms/Algorithm.py
  39. 45 0
      optimization/algorithms/IteratedLocalSearch.py
  40. 41 0
      optimization/algorithms/LocalSearch.py
  41. 0 0
      optimization/algorithms/__init__.py
  42. 75 0
      optimization/checkpoints/BasicCheckpoint.py
  43. 22 0
      optimization/checkpoints/Checkpoint.py
  44. 0 0
      optimization/checkpoints/__init__.py
  45. 8 0
      optimization/evaluators/EvaluatorExample.py
  46. 0 0
      optimization/evaluators/__init__.py
  47. 48 0
      optimization/mainExample.py
  48. 7 0
      optimization/operators/Operator.py
  49. 0 0
      optimization/operators/__init__.py
  50. 11 0
      optimization/operators/crossovers/Crossover.py
  51. 31 0
      optimization/operators/crossovers/SimpleCrossover.py
  52. 0 0
      optimization/operators/crossovers/__init__.py
  53. 11 0
      optimization/operators/mutators/Mutation.py
  54. 28 0
      optimization/operators/mutators/SimpleBinaryMutation.py
  55. 35 0
      optimization/operators/mutators/SimpleMutation.py
  56. 0 0
      optimization/operators/mutators/__init__.py
  57. 40 0
      optimization/operators/policies/Policy.py
  58. 16 0
      optimization/operators/policies/RandomPolicy.py
  59. 0 0
      optimization/operators/policies/__init__.py
  60. 1 0
      optimization/requirements.txt
  61. 40 0
      optimization/solutions/BinarySolution.py
  62. 40 0
      optimization/solutions/CombinatoryIntegerSolution.py
  63. 40 0
      optimization/solutions/IntegerSolution.py
  64. 45 0
      optimization/solutions/Solution.py
  65. 0 0
      optimization/solutions/__init__.py
  66. 93 0
      others/save_model_result_in_md.py
  67. 324 0
      others/save_model_result_in_md_maxwell.py
  68. 62 0
      others/testModelByScene.sh
  69. 70 0
      others/testModelByScene_maxwell.sh
  70. 107 0
      prediction/predict_noisy_image_rfe.py
  71. 214 0
      prediction/predict_seuil_expe.py
  72. 163 0
      prediction/predict_seuil_expe_curve_scene.py
  73. 216 0
      prediction/predict_seuil_expe_maxwell.py
  74. 174 0
      prediction/predict_seuil_expe_maxwell_curve.py
  75. 125 0
      prediction/prediction_scene.py
  76. 12 0
      requirements.txt
  77. 35 0
      run/runAll_maxwell_custom.sh
  78. 37 0
      run/runAll_maxwell_custom_optimization_attributes.sh
  79. 38 0
      run/runAll_maxwell_custom_optimization_filters.sh
  80. 6 0
      simulation/generate_all_simulate_curves.sh
  81. 39 0
      simulation/run_maxwell_simulation_filters_statistics.sh
  82. 56 0
      simulation/run_maxwell_simulation_filters_statistics_opti.sh
  83. 152 0
      train_model.py

+ 34 - 0
.gitignore

@@ -0,0 +1,34 @@
+# project data
+data
+saved_models
+threshold_map
+models_info
+custom_norm
+learned_zones
+corr_indices
+metric_curves
+results
+.ipynb_checkpoints
+
+# simulate_models.csv
+
+dataset
+
+.python-version
+__pycache__
+
+# by default avoid model files and png files
+*.png
+!saved_models/*.png
+.vscode
+
+# simulate models .csv file
+simulate_models*.csv
+
+# log file if used
+logs
+backups
+*.log
+
+oar.sh
+params.txt

+ 6 - 0
.gitmodules

@@ -0,0 +1,6 @@
+[submodule "modules"]
+	path = modules
+	url = https://github.com/prise-3d/Thesis-CommonModules.git
+[submodule "optimization"]
+	path = optimization
+	url = https://github.com/prise-3d/Thesis-OptimizationModules.git

+ 9 - 0
LICENSE

@@ -0,0 +1,9 @@
+MIT License
+Copyright (c) 2019 prise-3d
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+

+ 151 - 0
README.md

@@ -0,0 +1,151 @@
+# Noise detection using recursive feature elimination of attributes
+
+## Description
+
+Noise detection on synthesis images with 26 attributes obtained using few filters. 
+
+Filters list:
+- average
+- wiener
+- median
+- gaussian
+- wavelet
+
+and then complexity value from image obtained using `std(Sobel(X))`.
+
+## Requirements
+
+```
+pip install -r requirements.txt
+```
+
+Generate all needed data for each features (which requires the whole dataset. In order to get it, you need to contact us).
+
+```bash
+python generate/generate_all_data.py --feature all
+```
+
+
+## Project structure
+
+### Link to your dataset
+
+You have to create a symbolic link to your own database which respects this structure:
+
+- dataset/
+  - Scene1/
+    - zone00/
+    - ...
+    - zone15/
+      - seuilExpe (file which contains threshold samples of zone image perceived by human)
+    - Scene1_00050.png
+    - Scene1_00070.png
+    - ...
+    - Scene1_01180.png
+    - Scene1_01200.png
+  - Scene2/
+    - ...
+  - ...
+
+Create your symbolic link:
+
+```
+ln -s /path/to/your/data dataset
+```
+
+### Code architecture description
+
+- **modules/\***: contains all modules usefull for the whole project (such as configuration variables)
+- **generate/\***: contains python scripts for generate data from scenes (described later)
+- **data_processing/\***: all python scripts for generate custom dataset for models
+- **prediction/\***: all python scripts for predict new threshold from computed models
+- **simulation/\***: contains all bash scripts used for run simulation from models
+- **display/\***: contains all python scripts used for display Scene information (such as Singular values...)
+- **run/\***: bash scripts to run few step at once : 
+  - generate custom dataset
+  - train model
+  - keep model performance
+  - run simulation (if necessary)
+- **others/\***: folders which contains others scripts such as script for getting performance of model on specific scene and write it into Mardown file.
+- **data_attributes.py**: files which contains all extracted features implementation from an image.
+- **custom_config.py**: override the main configuration project of `modules/config/global_config.py`
+- **train_model.py**: script which is used to run specific model available.
+
+### Generated data directories:
+
+- **data/\***: folder which will contain all generated *.train* & *.test* files in order to train model.
+- **saved_models/\***: all scikit learn or keras models saved.
+- **models_info/\***: all markdown files generated to get quick information about model performance and prediction obtained after running `run/runAll_*.sh` script.
+- **results/**:  This folder contains `model_comparisons.csv` file used for store models performance.
+
+
+## How to use ?
+
+**Remark**: Note here that all python script have *--help* command.
+
+```
+python generate_data_model.py --help
+```
+
+Parameters explained:
+- **feature**: feature choice wished
+- **output**: filename of data (which will be split into two parts, *.train* and *.test* relative to your choices). Need to be into `data` folder.
+- **interval**: the interval of data you want to use from SVD vector.
+- **kind**: kind of data ['svd', 'svdn', 'svdne']; not normalize, normalize vector only and normalize together.
+- **scenes**: scenes choice for training dataset.
+- **zones**: zones to take for training dataset.
+- **step**: specify if all pictures are used or not using step process.
+- **percent**: percent of data amount of zone to take (choose randomly) of zone
+- **custom**: specify if you want your data normalized using interval and not the whole singular values vector. If it is, the value of this parameter is the output filename which will store the min and max value found. This file will be usefull later to make prediction with model (optional parameter).
+
+### Train model
+
+This is an example of how to train a model
+
+```bash
+python train_model.py --data 'data/xxxx' --output 'model_file_to_save' --choice 'model_choice'
+```
+
+### Predict image using model
+
+Now we have a model trained, we can use it with an image as input:
+
+```bash
+python prediction/predict_noisy_image_rfe.py --image path/to/image.png --interval "x,x" --model saved_models/xxxxxx.joblib --feature 'filter_statistics' --mode 'svdn' --custom 'min_max_filename'
+```
+
+- **feature**: feature choice need to be one of the listed above.
+- **custom**: specify filename with custom min and max from your data interval. This file was generated using **custom** parameter of one of the **generate_data_model\*.py** script (optional parameter).
+
+The model will return only 0 or 1:
+- 1 means noisy image is detected.
+- 0 means image seems to be not noisy.
+
+All SVD features developed need:
+- Name added into *feature_choices_labels* global array variable of `custom_config.py` file.
+- A specification of how you compute the feature into *get_image_features* method of `data_attributes.py` file.
+
+### Predict scene using model
+
+Now we have a model trained, we can use it with an image as input:
+
+```bash
+python prediction_scene.py --data path/to/xxxx.csv --model saved_model/xxxx.joblib --output xxxxx --scene xxxx
+```
+**Remark**: *scene* parameter expected need to be the correct name of the Scene.
+
+### Visualize data
+
+All scripts with names **display/display_\*.py** are used to display data information or results.
+
+Just use --help option to get more information.
+
+### Simulate model on scene
+
+All scripts named **prediction/predict_seuil_expe\*.py** are used to simulate model prediction during rendering process. Do not forget the **custom** parameter filename if necessary.
+
+Once you have simulation done. Checkout your **threshold_map/%MODEL_NAME%/simulation\_curves\_zones\_\*/** folder and use it with help of **display_simulation_curves.py** script.
+
+## License
+
+[The MIT license](LICENSE)

+ 23 - 0
custom_config.py

@@ -0,0 +1,23 @@
+from modules.config.attributes_config import *
+
+# store all variables from global config
+context_vars = vars()
+
+# folders
+logs_folder                             = 'logs'
+backup_folder                           = 'backups'
+
+## min_max_custom_folder           = 'custom_norm'
+## correlation_indices_folder      = 'corr_indices'
+
+# variables
+features_choices_labels                 = ['filters_statistics', 'svd', 'filters_statistics_sobel', 'svd_sobel']
+optimization_filters_result_filename    = 'optimization_comparisons_filters.csv'
+optimization_attributes_result_filename = 'optimization_comparisons_attributes.csv'
+
+models_names_list                       = ["rfe_svm_model"]
+
+## models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2","deep_keras"]
+## normalization_choices           = ['svd', 'svdn', 'svdne']
+
+# parameters

+ 127 - 0
data_attributes.py

@@ -0,0 +1,127 @@
+# main imports
+import numpy as np
+import sys
+
+# image transform imports
+from PIL import Image
+from skimage import color
+from sklearn.decomposition import FastICA
+from sklearn.decomposition import IncrementalPCA
+from sklearn.decomposition import TruncatedSVD
+from numpy.linalg import svd as lin_svd
+from scipy.signal import medfilt2d, wiener, cwt
+import pywt
+import cv2
+
+from ipfml.processing import transform, compression, segmentation
+from ipfml import utils
+
+# modules and config imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from modules.utils import data as dt
+
+
+def get_image_features(data_type, block):
+    """
+    Method which returns the data type expected
+    """
+
+    if 'filters_statistics' in data_type:
+
+        img_width, img_height = 200, 200
+
+        lab_img = transform.get_LAB_L(block)
+        arr = np.array(lab_img)
+
+        # compute all filters statistics
+        def get_stats(arr, I_filter):
+
+            e1       = np.abs(arr - I_filter)
+            L        = np.array(e1)
+            mu0      = np.mean(L)
+            A        = L - mu0
+            H        = A * A
+            E        = np.sum(H) / (img_width * img_height)
+            P        = np.sqrt(E)
+
+            return mu0, P
+
+        stats = []
+
+        kernel = np.ones((3,3),np.float32)/9
+        stats.append(get_stats(arr, cv2.filter2D(arr,-1,kernel)))
+
+        kernel = np.ones((5,5),np.float32)/25
+        stats.append(get_stats(arr, cv2.filter2D(arr,-1,kernel)))
+
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (3, 3), 0.5)))
+
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (3, 3), 1)))
+
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (3, 3), 1.5)))
+
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (5, 5), 0.5)))
+
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (5, 5), 1)))
+
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (5, 5), 1.5)))
+
+        stats.append(get_stats(arr, medfilt2d(arr, [3, 3])))
+
+        stats.append(get_stats(arr, medfilt2d(arr, [5, 5])))
+
+        stats.append(get_stats(arr, wiener(arr, [3, 3])))
+
+        stats.append(get_stats(arr, wiener(arr, [5, 5])))
+
+        wave = w2d(arr, 'db1', 2)
+        stats.append(get_stats(arr, np.array(wave, 'float64')))
+
+        data = []
+
+        for stat in stats:
+            data.append(stat[0])
+
+        for stat in stats:
+            data.append(stat[1])
+        
+        data = np.array(data)
+
+    return data
+
+
+def w2d(arr, mode='haar', level=1):
+    #convert to float   
+    imArray = arr
+    np.divide(imArray, 255)
+
+    # compute coefficients 
+    coeffs=pywt.wavedec2(imArray, mode, level=level)
+
+    #Process Coefficients
+    coeffs_H=list(coeffs)  
+    coeffs_H[0] *= 0
+
+    # reconstruction
+    imArray_H = pywt.waverec2(coeffs_H, mode)
+    imArray_H *= 255
+    imArray_H = np.uint8(imArray_H)
+
+    return imArray_H
+
+
+def _get_mscn_variance(block, sub_block_size=(50, 50)):
+
+    blocks = segmentation.divide_in_blocks(block, sub_block_size)
+
+    data = []
+
+    for block in blocks:
+        mscn_coefficients = transform.get_mscn_coefficients(block)
+        flat_coeff = mscn_coefficients.flatten()
+        data.append(np.var(flat_coeff))
+
+    return np.sort(data)
+

+ 57 - 0
data_processing/generateAndTrain_maxwell_custom.sh

@@ -0,0 +1,57 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No argument supplied"
+    echo "Need of feature information"
+    exit 1
+fi
+
+if [ -z "$3" ]
+  then
+    echo "No argument supplied"
+    echo "Need of kind of data to use"
+    exit 1
+fi
+
+size=$1
+feature=$2
+data=$3
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+start=0
+end=$size
+model="rfe_svm_model"
+
+for nb_zones in {10,11,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+
+        FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
+        MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
+        CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_min_max"
+
+        echo $FILENAME
+
+        # only compute if necessary (perhaps server will fall.. Just in case)
+        if grep -q "${MODEL_NAME}" "${result_filename}"; then
+
+            echo "${MODEL_NAME} results already generated..."
+        else
+            python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+            #python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+            #python prediction/predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+            #python others/save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
+        fi
+    done
+done

+ 68 - 0
data_processing/generateAndTrain_maxwell_custom_optimization.sh

@@ -0,0 +1,68 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No argument supplied"
+    echo "Need of feature information"
+    exit 1
+fi
+
+if [ -z "$3" ]
+  then
+    echo "No argument supplied"
+    echo "Need of kind of data to use"
+    exit 1
+fi
+
+if [ -z "$4" ]
+  then
+    echo "No argument supplied"
+    echo "Use of filters or attributes"
+    exit 1
+fi
+
+
+size=$1
+feature=$2
+data=$3
+filter=$4
+
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+result_filename="results/optimization_comparisons_${filter}.csv"
+start=0
+end=$size
+
+#for nb_zones in {4,6,8,10,12}; do
+for nb_zones in {10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+      
+            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_${filter}"
+            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_${filter}"
+            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_${filter}_min_max"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${result_filename}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+                
+                echo "Train ${MODEL_NAME}"
+                #python find_best_${filter}.py --data ${FILENAME} --choice ${model} &
+            fi
+        done
+    done
+done

+ 147 - 0
display/display_reconstructed_image_from_humans.py

@@ -0,0 +1,147 @@
+# main imports
+import numpy as np
+import pandas as pd
+import math
+import time
+
+import os, sys, argparse
+
+# image processing imports
+import matplotlib.pyplot as plt
+from PIL import Image
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from data_attributes import get_image_features
+from modules.utils import data as dt
+
+# other variables
+learned_zones_folder = cfg.learned_zones_folder
+models_name          = cfg.models_names_list
+
+# utils information
+zone_width, zone_height = (200, 200)
+scene_width, scene_height = (800, 800)
+nb_x_parts = math.floor(scene_width / zone_width)
+
+
+def reconstruct_image(scene_name, output):
+    """
+    @brief Method used to display simulation given .csv files
+    @param scene_name, scene name used
+    @param output, the output filename
+    @return nothing
+    """
+
+    # compute zone start index
+    zones_coordinates = []
+    for zone_index in cfg.zones_indices:
+        x_zone = (zone_index % nb_x_parts) * zone_width
+        y_zone = (math.floor(zone_index / nb_x_parts)) * zone_height
+
+        zones_coordinates.append((x_zone, y_zone))
+
+    scene_folder = os.path.join(cfg.dataset_path, scene_name)
+
+    folder_scene_elements = os.listdir(scene_folder)
+
+    zones_folder = [zone for zone in folder_scene_elements if 'zone' in zone]
+    zones_folder = sorted(zones_folder)
+
+    scenes_images = [img for img in folder_scene_elements if cfg.scene_image_extension in img]
+    scenes_images = sorted(scenes_images)
+
+    # 1. find thresholds from scene
+    human_thresholds = []
+
+    for zone_folder in zones_folder:
+        zone_path = os.path.join(scene_folder, zone_folder)
+        
+        with open(os.path.join(zone_path, cfg.seuil_expe_filename)) as f:
+            human_thresholds.append(int(f.readline()))
+
+    # 2. find images for each zone which are attached to these human thresholds by the model
+    zone_images_index = []
+
+    for threshold in human_thresholds:
+
+        current_image_index = 0
+
+        for image_name in scenes_images:
+
+            image_quality = dt.get_scene_image_quality(image_name)
+
+            if image_quality > threshold:
+                current_image_index = image_quality
+                break
+
+
+        str_index = str(current_image_index)
+        while len(str_index) < 5:
+            str_index = "0" + str_index
+
+        zone_images_index.append(str_index)
+
+    images_zones = []
+    line_images_zones = []
+    # get image using threshold by zone
+    for id, zone_index in enumerate(zone_images_index):
+        filtered_images = [img for img in scenes_images if zone_index in img]
+        
+        if len(filtered_images) > 0:
+            image_name = filtered_images[0]
+        else:
+            image_name = scenes_images[-1]
+        
+        image_path = os.path.join(scene_folder, image_name)
+        selected_image = Image.open(image_path)
+
+        x_zone, y_zone = zones_coordinates[id]
+        zone_image = np.array(selected_image)[y_zone:y_zone+zone_height, x_zone:x_zone+zone_width]
+        line_images_zones.append(zone_image)
+
+        if int(id + 1) % int(scene_width / zone_width) == 0:
+            images_zones.append(np.concatenate(line_images_zones, axis=1))
+            line_images_zones = []
+
+
+    # 3. reconstructed the image using these zones
+    reconstructed_image = np.concatenate(images_zones, axis=0)
+
+    # 4. Save the image with generated name based on scene
+    reconstructed_pil_img = Image.fromarray(reconstructed_image)
+
+    folders = output.split('/')
+    if len(folders) > 1:
+        output_folder = '/'.join(folders[:len(folders) - 1])
+        
+        if not os.path.exists(output_folder):
+            os.makedirs(output_folder)
+
+    reconstructed_pil_img.save(output)
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Compute and save reconstructed images from human thresholds")
+
+    parser.add_argument('--scene', type=str, help='Scene index to use', choices=cfg.scenes_indices)
+    parser.add_argument('--output', type=str, help='Output reconstructed image path and filename')
+
+    args = parser.parse_args()
+
+    p_scene = args.scene
+    p_output = args.output
+    
+    scenes_list = cfg.scenes_names
+    scenes_indices = cfg.scenes_indices
+
+    scene_index = scenes_indices.index(p_scene.strip())
+    scene_name = scenes_list[scene_index]
+
+    reconstruct_image(scene_name, p_output)
+
+if __name__== "__main__":
+    main()

+ 184 - 0
display/display_reconstructed_image_from_simulation.py

@@ -0,0 +1,184 @@
+# main imports
+import numpy as np
+import pandas as pd
+import math
+import time
+
+import os, sys, argparse
+
+# image processing imports
+import matplotlib.pyplot as plt
+from PIL import Image
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from data_attributes import get_image_features
+
+# other variables
+learned_zones_folder = cfg.learned_zones_folder
+models_name          = cfg.models_names_list
+
+# utils information
+zone_width, zone_height = (200, 200)
+scene_width, scene_height = (800, 800)
+nb_x_parts = math.floor(scene_width / zone_width)
+
+
+def reconstruct_image(folder_path, model_name, p_limit):
+    """
+    @brief Method used to display simulation given .csv files
+    @param folder_path, folder which contains all .csv files obtained during simulation
+    @param model_name, current name of model
+    @return nothing
+    """
+
+    for name in models_name:
+        if name in model_name:
+            data_filename = model_name
+            learned_zones_folder_path = os.path.join(learned_zones_folder, data_filename)
+
+    data_files = [x for x in os.listdir(folder_path) if '.png' not in x]
+
+    scene_names = [f.split('_')[3] for f in data_files]
+
+    # compute zone start index
+    zones_coordinates = []
+    for index, zone_index in enumerate(cfg.zones_indices):
+        x_zone = (zone_index % nb_x_parts) * zone_width
+        y_zone = (math.floor(zone_index / nb_x_parts)) * zone_height
+
+        zones_coordinates.append((x_zone, y_zone))
+
+    print(zones_coordinates)
+
+    for id, f in enumerate(data_files):
+
+        scene_name = scene_names[id]
+        path_file = os.path.join(folder_path, f)
+
+        # TODO : check if necessary to keep information about zone learned when displaying data
+        scenes_zones_used_file_path = os.path.join(learned_zones_folder_path, scene_name + '.csv')
+
+        zones_used = []
+
+        if os.path.exists(scenes_zones_used_file_path):
+            with open(scenes_zones_used_file_path, 'r') as f:
+                zones_used = [int(x) for x in f.readline().split(';') if x != '']
+
+        # 1. find estimated threshold for each zone scene using `data_files` and p_limit
+        model_thresholds = []
+        df = pd.read_csv(path_file, header=None, sep=";")
+
+        for index, row in df.iterrows():
+
+            row = np.asarray(row)
+
+            #threshold = row[2]
+            start_index = row[3]
+            step_value = row[4]
+            rendering_predictions = row[5:]
+
+            nb_generated_image = 0
+            nb_not_noisy_prediction = 0
+
+            for prediction in rendering_predictions:
+                
+                if int(prediction) == 0:
+                    nb_not_noisy_prediction += 1
+                else:
+                    nb_not_noisy_prediction = 0
+
+                # exit loop if limit is targeted
+                if nb_not_noisy_prediction >= p_limit:
+                    break
+
+                nb_generated_image += 1
+            
+            current_threshold = start_index + step_value * nb_generated_image
+            model_thresholds.append(current_threshold)
+
+        # 2. find images for each zone which are attached to this estimated threshold by the model
+
+        zone_images_index = []
+
+        for est_threshold in model_thresholds:
+
+            str_index = str(est_threshold)
+            while len(str_index) < 5:
+                str_index = "0" + str_index
+
+            zone_images_index.append(str_index)
+
+        scene_folder = os.path.join(cfg.dataset_path, scene_name)
+        
+        scenes_images = [img for img in os.listdir(scene_folder) if cfg.scene_image_extension in img]
+        scenes_images = sorted(scenes_images)
+
+        images_zones = []
+        line_images_zones = []
+        # get image using threshold by zone
+        for id, zone_index in enumerate(zone_images_index):
+            filtered_images = [img for img in scenes_images if zone_index in img]
+            
+            if len(filtered_images) > 0:
+                image_name = filtered_images[0]
+            else:
+                image_name = scenes_images[-1]
+            
+            #print(image_name)
+            image_path = os.path.join(scene_folder, image_name)
+            selected_image = Image.open(image_path)
+
+            x_zone, y_zone = zones_coordinates[id]
+            zone_image = np.array(selected_image)[y_zone:y_zone+zone_height, x_zone:x_zone+zone_width]
+            line_images_zones.append(zone_image)
+
+            if int(id + 1) % int(scene_width / zone_width) == 0:
+                images_zones.append(np.concatenate(line_images_zones, axis=1))
+                print(len(line_images_zones))
+                line_images_zones = []
+
+
+        # 3. reconstructed the image using these zones
+        reconstructed_image = np.concatenate(images_zones, axis=0)
+
+        # 4. Save the image with generated name based on scene, model and `p_limit`
+        reconstructed_pil_img = Image.fromarray(reconstructed_image)
+
+        output_path = os.path.join(folder_path, scene_names[id] + '_reconstruction_limit_' + str(p_limit) + '.png')
+
+        reconstructed_pil_img.save(output_path)
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Display simulations curves from simulation data")
+
+    parser.add_argument('--folder', type=str, help='Folder which contains simulations data for scenes')
+    parser.add_argument('--model', type=str, help='Name of the model used for simulations')
+    parser.add_argument('--limit', type=int, help='Detection limit to target to stop rendering (number of times model tells image has not more noise)')
+
+    args = parser.parse_args()
+
+    p_folder = args.folder
+    p_limit  = args.limit
+
+    if args.model:
+        p_model = args.model
+    else:
+        # find p_model from folder if model arg not given (folder path need to have model name)
+        if p_folder.split('/')[-1]:
+            p_model = p_folder.split('/')[-1]
+        else:
+            p_model = p_folder.split('/')[-2]
+    
+    print(p_model)
+
+    reconstruct_image(p_folder, p_model, p_limit)
+
+    print(p_folder)
+
+if __name__== "__main__":
+    main()

+ 128 - 0
display/display_simulation_curves.py

@@ -0,0 +1,128 @@
+# main imports
+import numpy as np
+import pandas as pd
+
+import os, sys, argparse
+
+# image processing imports
+import matplotlib.pyplot as plt
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from data_attributes import get_image_features
+
+# other variables
+learned_zones_folder = cfg.learned_zones_folder
+models_name          = cfg.models_names_list
+label_freq           = 6
+
+def display_curves(folder_path, model_name):
+    """
+    @brief Method used to display simulation given .csv files
+    @param folder_path, folder which contains all .csv files obtained during simulation
+    @param model_name, current name of model
+    @return nothing
+    """
+
+    for name in models_name:
+        if name in model_name:
+            data_filename = model_name
+            learned_zones_folder_path = os.path.join(learned_zones_folder, data_filename)
+
+    data_files = [x for x in os.listdir(folder_path) if '.png' not in x]
+
+    scene_names = [f.split('_')[3] for f in data_files]
+
+    for id, f in enumerate(data_files):
+
+        print(scene_names[id])
+        path_file = os.path.join(folder_path, f)
+
+        scenes_zones_used_file_path = os.path.join(learned_zones_folder_path, scene_names[id] + '.csv')
+
+        # by default zone used is empty
+        zones_used = []
+
+        if os.path.exists(scenes_zones_used_file_path):
+            with open(scenes_zones_used_file_path, 'r') as f:
+                zones_used = [int(x) for x in f.readline().split(';') if x != '']
+
+        print(zones_used)
+
+        df = pd.read_csv(path_file, header=None, sep=";")
+
+        fig=plt.figure(figsize=(35, 22))
+        fig.suptitle("Detection simulation for " + scene_names[id] + " scene", fontsize=20)
+
+        for index, row in df.iterrows():
+
+            row = np.asarray(row)
+
+            threshold = row[2]
+            start_index = row[3]
+            step_value = row[4]
+
+            counter_index = 0
+
+            current_value = start_index
+
+            while(current_value < threshold):
+                counter_index += 1
+                current_value += step_value
+
+            fig.add_subplot(4, 4, (index + 1))
+            plt.plot(row[5:])
+
+            if index in zones_used:
+                ax = plt.gca()
+                ax.set_facecolor((0.9, 0.95, 0.95))
+
+            # draw vertical line from (70,100) to (70, 250)
+            plt.plot([counter_index, counter_index], [-2, 2], 'k-', lw=2, color='red')
+
+            if index % 4 == 0:
+                plt.ylabel('Not noisy / Noisy', fontsize=20)
+
+            if index >= 12:
+                plt.xlabel('Samples per pixel', fontsize=20)
+
+            x_labels = [id * step_value + start_index for id, val in enumerate(row[5:]) if id % label_freq == 0]
+
+            x = [v for v in np.arange(0, len(row[5:])+1) if v % label_freq == 0]
+
+            plt.xticks(x, x_labels, rotation=45)
+            plt.ylim(-1, 2)
+
+        plt.savefig(os.path.join(folder_path, scene_names[id] + '_simulation_curve.png'))
+        #plt.show()
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Display simulations curves from simulation data")
+
+    parser.add_argument('--folder', type=str, help='Folder which contains simulations data for scenes')
+    parser.add_argument('--model', type=str, help='Name of the model used for simulations')
+
+    args = parser.parse_args()
+
+    p_folder = args.folder
+
+    if args.model:
+        p_model = args.model
+    else:
+        # find p_model from folder if model arg not given (folder path need to have model name)
+        if p_folder.split('/')[-1]:
+            p_model = p_folder.split('/')[-1]
+        else:
+            p_model = p_folder.split('/')[-2]
+    
+    print(p_model)
+
+    display_curves(p_folder, p_model)
+
+    print(p_folder)
+
+if __name__== "__main__":
+    main()

+ 189 - 0
generate/generate_all_data.py

@@ -0,0 +1,189 @@
+# main imports
+import sys, os, argparse
+import numpy as np
+import random
+import time
+import json
+
+# image processing imports
+from PIL import Image
+
+from ipfml.processing import transform, segmentation
+from ipfml import utils
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from modules.utils import data as dt
+from data_attributes import get_image_features
+
+
+# getting configuration information
+zone_folder             = cfg.zone_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list             = cfg.scenes_names
+scenes_indexes          = cfg.scenes_indices
+choices                 = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+features_choices        = cfg.features_choices_labels
+output_data_folder      = cfg.output_data_folder
+
+generic_output_file_svd = '_random.csv'
+
+def generate_data_svd(data_type, mode):
+    """
+    @brief Method which generates all .csv files from scenes
+    @param data_type,  feature choice
+    @param mode, normalization choice
+    @return nothing
+    """
+
+    scenes = os.listdir(path)
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    # keep in memory min and max data found from data_type
+    min_val_found = sys.maxsize
+    max_val_found = 0
+
+    data_min_max_filename = os.path.join(path, data_type + min_max_filename)
+
+    # go ahead each scenes
+    for folder_scene in scenes:
+
+        print(folder_scene)
+        scene_path = os.path.join(path, folder_scene)
+
+        # getting output filename
+        output_svd_filename = data_type + "_" + mode + generic_output_file_svd
+
+        # construct each zones folder name
+        zones_folder = []
+        svd_output_files = []
+
+        # get zones list info
+        for index in zones:
+            index_str = str(index)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+
+            current_zone = "zone"+index_str
+            zones_folder.append(current_zone)
+
+            zone_path = os.path.join(scene_path, current_zone)
+            svd_file_path = os.path.join(zone_path, output_svd_filename)
+
+            # add writer into list
+            svd_output_files.append(open(svd_file_path, 'w'))
+
+        # get all images of folder
+        scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
+        number_scene_image = len(scene_images)
+            
+        for id_img, img_path in enumerate(scene_images):
+            
+            current_image_postfix = dt.get_scene_image_postfix(img_path)
+
+            current_img = Image.open(img_path)
+            img_blocks = segmentation.divide_in_blocks(current_img, (200, 200))
+
+            for id_block, block in enumerate(img_blocks):
+
+                ###########################
+                # feature computation part #
+                ###########################
+
+                data = get_image_features(data_type, block)
+
+                ##################
+                # Data mode part #
+                ##################
+
+                # modify data depending mode
+                if mode == 'svdne':
+
+                    # getting max and min information from min_max_filename
+                    with open(data_min_max_filename, 'r') as f:
+                        min_val = float(f.readline())
+                        max_val = float(f.readline())
+
+                    data = utils.normalize_arr_with_range(data, min_val, max_val)
+
+                if mode == 'svdn':
+                    data = utils.normalize_arr(data)
+
+                # save min and max found from dataset in order to normalize data using whole data known
+                if mode == 'svd':
+
+                    current_min = data.min()
+                    current_max = data.max()
+
+                    if current_min < min_val_found:
+                        min_val_found = current_min
+
+                    if current_max > max_val_found:
+                        max_val_found = current_max
+
+                # now write data into current writer
+                current_file = svd_output_files[id_block]
+
+                # add of index
+                current_file.write(current_image_postfix + ';')
+
+                for val in data:
+                    current_file.write(str(val) + ";")
+
+                current_file.write('\n')
+
+            print(data_type + "_" + mode + "_" + folder_scene + " - " + "{0:.2f}".format((id_img + 1) / number_scene_image * 100.) + "%")
+            sys.stdout.write("\033[F")
+
+        for f in svd_output_files:
+            f.close()
+
+        print('\n')
+
+    # save current information about min file found
+    if mode == 'svd':
+        with open(data_min_max_filename, 'w') as f:
+            f.write(str(min_val_found) + '\n')
+            f.write(str(max_val_found) + '\n')
+
+    print("%s_%s : end of data generation\n" % (data_type, mode))
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Compute and prepare data of feature of all scenes (keep in memory min and max value found)")
+
+   
+    parser.add_argument('--feature', type=str, 
+                                    help="feature choice in order to compute data (use 'all' if all features are needed)")
+
+    args = parser.parse_args()
+
+    p_feature = args.feature
+
+    # generate all or specific feature data
+    if p_feature == 'all':
+        for m in features_choices:
+            generate_data_svd(m, 'svd')
+            generate_data_svd(m, 'svdn')
+            generate_data_svd(m, 'svdne')
+    else:
+
+        if p_feature not in features_choices:
+            raise ValueError('Unknown feature choice : ', features_choices)
+            
+        generate_data_svd(p_feature, 'svd')
+        generate_data_svd(p_feature, 'svdn')
+        generate_data_svd(p_feature, 'svdne')
+
+if __name__== "__main__":
+    main()

+ 272 - 0
generate/generate_data_model.py

@@ -0,0 +1,272 @@
+# main imports
+import sys, os, argparse
+import numpy as np
+import pandas as pd
+import random
+
+# image processing imports
+from PIL import Image
+
+from ipfml import utils
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from modules.utils import data as dt
+from data_attributes import get_image_features
+
+
+# getting configuration information
+learned_folder          = cfg.learned_zones_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes variables
+scenes_list             = cfg.scenes_names
+scenes_indexes          = cfg.scenes_indices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+renderer_choices        = cfg.renderer_choices
+normalization_choices   = cfg.normalization_choices
+features_choices        = cfg.features_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+zones_indices           = cfg.zones_indices
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval = sys.maxsize
+max_value_interval = 0
+
+def construct_new_line(path_seuil, interval, line, choice, each, norm):
+    begin, end = interval
+
+    line_data = line.split(';')
+    seuil = line_data[0]
+    features = line_data[begin+1:end+1]
+
+    features = [float(m) for id, m in enumerate(features) if id % each == 0 ]
+
+    if norm:
+        if choice == 'svdne':
+            features = utils.normalize_arr_with_range(features, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            features = utils.normalize_arr(features)
+
+    with open(path_seuil, "r") as seuil_file:
+        seuil_learned = int(seuil_file.readline().strip())
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for val in features:
+        line += ';'
+        line += str(val)
+    line += '\n'
+
+    return line
+
+def get_min_max_value_interval(_scenes_list, _interval, _feature):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for folder_scene in scenes:
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for zone_folder in zones_folder:
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _feature + "_svd" + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+                    features = line_data[begin+1:end+1]
+                    features = [float(m) for m in features]
+
+                    min_value = min(features)
+                    max_value = max(features)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_filename, _interval, _choice, _feature, _scenes = scenes_list, _zones = zones_indices, _percent = 1, _step=1, _each=1, _norm=False, _custom=False):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for folder_scene in scenes_list:
+
+        # only take care of maxwell scenes
+        scene_path = os.path.join(path, folder_scene)
+
+        zones_indices = zones
+
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
+
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
+
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
+
+        with open(file_learned_path, 'w') as f:
+            for i in _zones:
+                f.write(str(i) + ';')
+
+        for id_zone, index_folder in enumerate(zones_indices):
+
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
+
+            zone_path = os.path.join(scene_path, current_zone_folder)
+
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _feature + "_svd" + generic_output_file_svd
+            else:
+                data_filename = _feature + "_" + _choice + generic_output_file_svd
+
+            data_file_path = os.path.join(zone_path, data_filename)
+
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
+
+            num_lines = len(lines)
+
+            lines_indexes = np.arange(num_lines)
+            random.shuffle(lines_indexes)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for index in lines_indexes:
+
+                image_index = int(lines[index].split(';')[0])
+                percent = counter / num_lines
+
+                if image_index % _step == 0:
+                    line = construct_new_line(path_seuil, _interval, lines[index], _choice, _each, _norm)
+
+                    if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
+                        train_file.write(line)
+                    else:
+                        test_file.write(line)
+
+                counter += 1
+
+            f.close()
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--zones', type=str, help='Zones indices to use for training data set')
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)', default=1.0)
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_feature  = args.feature
+    p_scenes   = args.scenes.split(',')
+    p_zones    = list(map(int, args.zones.split(',')))
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indices.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(scenes_list, p_interval, p_feature)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(p_filename, p_interval, p_kind, p_feature, scenes_selected, p_zones, p_percent, p_step, p_each, p_custom)
+
+if __name__== "__main__":
+    main()

+ 299 - 0
generate/generate_data_model_random_all.py

@@ -0,0 +1,299 @@
+# main imports
+import sys, os, argparse
+import numpy as np
+import pandas as pd
+import random
+
+# image processing imports
+from PIL import Image
+
+from ipfml import utils
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from modules.utils import data as dt
+from data_attributes import get_image_features
+
+
+# getting configuration information
+learned_folder          = cfg.learned_zones_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes variables
+all_scenes_list         = cfg.scenes_names
+all_scenes_indices      = cfg.scenes_indices
+
+normalization_choices   = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+renderer_choices        = cfg.renderer_choices
+features_choices        = cfg.features_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval      = sys.maxsize
+max_value_interval      = 0
+
+def construct_new_line(path_seuil, interval, line, choice, each, norm):
+    begin, end = interval
+
+    line_data = line.split(';')
+    seuil = line_data[0]
+    features = line_data[begin+1:end+1]
+
+    # keep only if modulo result is 0 (keep only each wanted values)
+    features = [float(m) for id, m in enumerate(features) if id % each == 0]
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+
+        if choice == 'svdne':
+            features = utils.normalize_arr_with_range(features, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            features = utils.normalize_arr(features)
+
+    with open(path_seuil, "r") as seuil_file:
+        seuil_learned = int(seuil_file.readline().strip())
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for val in features:
+        line += ';'
+        line += str(val)
+    line += '\n'
+
+    return line
+
+
+def get_min_max_value_interval(_scenes_list, _interval, _feature):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for folder_scene in scenes:
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for zone_folder in zones_folder:
+
+                zone_path = os.path.join(scene_path, zone_folder)
+
+                # if custom normalization choices then we use svd values not already normalized
+                data_filename = _feature + "_svd"+ generic_output_file_svd
+
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+
+                    features = line_data[begin+1:end+1]
+                    features = [float(m) for m in features]
+
+                    min_value = min(features)
+                    max_value = max(features)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_scenes_list, _filename, _interval, _choice, _feature, _scenes, _nb_zones = 4, _percent = 1, _random=0, _step=1, _each=1, _custom = False):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file_data = []
+    test_file_data  = []
+
+    for folder_scene in _scenes_list:
+
+        scene_path = os.path.join(path, folder_scene)
+
+        zones_indices = zones
+
+        # shuffle list of zones (=> randomly choose zones)
+        # only in random mode
+        if _random:
+            random.shuffle(zones_indices)
+
+        # store zones learned
+        learned_zones_indices = zones_indices[:_nb_zones]
+
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
+
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
+
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
+
+        with open(file_learned_path, 'w') as f:
+            for i in learned_zones_indices:
+                f.write(str(i) + ';')
+
+        for id_zone, index_folder in enumerate(zones_indices):
+
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
+
+            zone_path = os.path.join(scene_path, current_zone_folder)
+
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _feature + "_svd" + generic_output_file_svd
+            else:
+                data_filename = _feature + "_" + _choice + generic_output_file_svd
+
+            data_file_path = os.path.join(zone_path, data_filename)
+
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
+
+            num_lines = len(lines)
+
+            # randomly shuffle image
+            if _random:
+                random.shuffle(lines)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for data in lines:
+
+                percent = counter / num_lines
+                image_index = int(data.split(';')[0])
+
+                if image_index % _step == 0:
+                    line = construct_new_line(path_seuil, _interval, data, _choice, _each, _custom)
+
+                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                        train_file_data.append(line)
+                    else:
+                        test_file_data.append(line)
+
+                counter += 1
+
+            f.close()
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set')
+    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_feature  = args.feature
+    p_scenes   = args.scenes.split(',')
+    p_nb_zones = args.nb_zones
+    p_random   = args.random
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indices.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(scenes_list, p_interval, p_feature)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_filename_path = os.path.join(custom_min_max_folder, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(scenes_list, p_filename, p_interval, p_kind, p_feature, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_each, p_custom)
+
+if __name__== "__main__":
+    main()

+ 310 - 0
generate/generate_data_model_random_center.py

@@ -0,0 +1,310 @@
+# main imports
+import sys, os, argparse
+import numpy as np
+import pandas as pd
+import random
+
+# image processing imports
+from PIL import Image
+
+from ipfml import utils
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from modules.utils import data as dt
+from data_attributes import get_image_features
+
+
+# getting configuration information
+learned_folder          = cfg.learned_zones_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes variables
+all_scenes_list         = cfg.scenes_names
+all_scenes_indices      = cfg.scenes_indices
+
+normalization_choices   = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+renderer_choices        = cfg.renderer_choices
+features_choices        = cfg.features_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval      = sys.maxsize
+max_value_interval      = 0
+abs_gap_data            = 150
+
+
+def construct_new_line(seuil_learned, interval, line, choice, each, norm):
+    begin, end = interval
+
+    line_data = line.split(';')
+    seuil = line_data[0]
+    features = line_data[begin+1:end+1]
+
+    # keep only if modulo result is 0 (keep only each wanted values)
+    features = [float(m) for id, m in enumerate(features) if id % each == 0]
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+
+        if choice == 'svdne':
+            features = utils.normalize_arr_with_range(features, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            features = utils.normalize_arr(features)
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for val in features:
+        line += ';'
+        line += str(val)
+    line += '\n'
+
+    return line
+
+def get_min_max_value_interval(_scenes_list, _interval, _feature):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for folder_scene in scenes:
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for zone_folder in zones_folder:
+
+                zone_path = os.path.join(scene_path, zone_folder)
+
+                # if custom normalization choices then we use svd values not already normalized
+                data_filename = _feature + "_svd"+ generic_output_file_svd
+
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+
+                    features = line_data[begin+1:end+1]
+                    features = [float(m) for m in features]
+
+                    min_value = min(features)
+                    max_value = max(features)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_scenes_list, _filename, _interval, _choice, _feature, _scenes, _nb_zones = 4, _percent = 1, _random=0, _step=1, _each=1, _custom = False):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file_data = []
+    test_file_data  = []
+
+    for folder_scene in _scenes_list:
+
+        scene_path = os.path.join(path, folder_scene)
+
+        zones_indices = zones
+
+        # shuffle list of zones (=> randomly choose zones)
+        # only in random mode
+        if _random:
+            random.shuffle(zones_indices)
+
+        # store zones learned
+        learned_zones_indices = zones_indices[:_nb_zones]
+
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
+
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
+
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
+
+        with open(file_learned_path, 'w') as f:
+            for i in learned_zones_indices:
+                f.write(str(i) + ';')
+
+        for id_zone, index_folder in enumerate(zones_indices):
+
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
+
+            zone_path = os.path.join(scene_path, current_zone_folder)
+
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _feature + "_svd"+ generic_output_file_svd
+            else:
+                data_filename = _feature + "_" + _choice + generic_output_file_svd
+
+            data_file_path = os.path.join(zone_path, data_filename)
+
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
+
+            num_lines = len(lines)
+
+            # randomly shuffle image
+            if _random:
+                random.shuffle(lines)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            with open(path_seuil, "r") as seuil_file:
+                seuil_learned = int(seuil_file.readline().strip())
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for data in lines:
+
+                percent = counter / num_lines
+                image_index = int(data.split(';')[0])
+
+                if image_index % _step == 0:
+
+                    with open(path_seuil, "r") as seuil_file:
+                        seuil_learned = int(seuil_file.readline().strip())
+
+                    gap_threshold = abs(seuil_learned - image_index)
+
+                    # only keep data near to threshold of zone image
+                    if gap_threshold <= abs_gap_data:
+
+                        line = construct_new_line(seuil_learned, _interval, data, _choice, _each, _custom)
+
+                        if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                            train_file_data.append(line)
+                        else:
+                            test_file_data.append(line)
+
+                counter += 1
+
+            f.close()
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set')
+    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_feature  = args.feature
+    p_scenes   = args.scenes.split(',')
+    p_nb_zones = args.nb_zones
+    p_random   = args.random
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
+
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indices.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(scenes_list, p_interval, p_feature)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_filename_path = os.path.join(custom_min_max_folder, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(scenes_list, p_filename, p_interval, p_kind, p_feature, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_each, p_custom)
+
+if __name__== "__main__":
+    main()

+ 309 - 0
generate/generate_data_model_random_split.py

@@ -0,0 +1,309 @@
+# main imports
+import sys, os, argparse
+import numpy as np
+import pandas as pd
+import random
+
+# image processing imports
+from PIL import Image
+
+from ipfml import utils
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from modules.utils import data as dt
+from data_attributes import get_image_features
+
+
+# getting configuration information
+learned_folder          = cfg.learned_zones_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes variables
+all_scenes_list         = cfg.scenes_names
+all_scenes_indices      = cfg.scenes_indices
+
+normalization_choices   = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+renderer_choices        = cfg.renderer_choices
+features_choices        = cfg.features_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval      = sys.maxsize
+max_value_interval      = 0
+abs_gap_data            = 100
+
+
+def construct_new_line(seuil_learned, interval, line, choice, each, norm):
+    begin, end = interval
+
+    line_data = line.split(';')
+    seuil = line_data[0]
+    features = line_data[begin+1:end+1]
+
+    # keep only if modulo result is 0 (keep only each wanted values)
+    features = [float(m) for id, m in enumerate(features) if id % each == 0]
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+
+        if choice == 'svdne':
+            features = utils.normalize_arr_with_range(features, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            features = utils.normalize_arr(features)
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for val in features:
+        line += ';'
+        line += str(val)
+    line += '\n'
+
+    return line
+
+def get_min_max_value_interval(_scenes_list, _interval, _feature):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for folder_scene in scenes:
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for zone_folder in zones_folder:
+
+                zone_path = os.path.join(scene_path, zone_folder)
+
+                # if custom normalization choices then we use svd values not already normalized
+                data_filename = _feature + "_svd"+ generic_output_file_svd
+
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+
+                    features = line_data[begin+1:end+1]
+                    features = [float(m) for m in features]
+
+                    min_value = min(features)
+                    max_value = max(features)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_scenes_list, _filename, _interval, _choice, _feature, _scenes, _nb_zones = 4, _percent = 1, _random=0, _step=1, _each=1, _custom = False):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file_data = []
+    test_file_data  = []
+
+    for folder_scene in _scenes_list:
+
+        scene_path = os.path.join(path, folder_scene)
+
+        zones_indices = zones
+
+        # shuffle list of zones (=> randomly choose zones)
+        # only in random mode
+        if _random:
+            random.shuffle(zones_indices)
+
+        # store zones learned
+        learned_zones_indices = zones_indices[:_nb_zones]
+
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
+
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
+
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
+
+        with open(file_learned_path, 'w') as f:
+            for i in learned_zones_indices:
+                f.write(str(i) + ';')
+
+        for id_zone, index_folder in enumerate(zones_indices):
+
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
+
+            zone_path = os.path.join(scene_path, current_zone_folder)
+
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _feature + "_svd"+ generic_output_file_svd
+            else:
+                data_filename = _feature + "_" + _choice + generic_output_file_svd
+
+            data_file_path = os.path.join(zone_path, data_filename)
+
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
+
+            num_lines = len(lines)
+
+            # randomly shuffle image
+            if _random:
+                random.shuffle(lines)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            with open(path_seuil, "r") as seuil_file:
+                seuil_learned = int(seuil_file.readline().strip())
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for data in lines:
+
+                percent = counter / num_lines
+                image_index = int(data.split(';')[0])
+
+                if image_index % _step == 0:
+
+                    with open(path_seuil, "r") as seuil_file:
+                        seuil_learned = int(seuil_file.readline().strip())
+
+                    gap_threshold = abs(seuil_learned - image_index)
+
+                    if gap_threshold > abs_gap_data:
+
+                        line = construct_new_line(seuil_learned, _interval, data, _choice, _each, _custom)
+
+                        if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                            train_file_data.append(line)
+                        else:
+                            test_file_data.append(line)
+
+                counter += 1
+
+            f.close()
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set')
+    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_feature  = args.feature
+    p_scenes   = args.scenes.split(',')
+    p_nb_zones = args.nb_zones
+    p_random   = args.random
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
+
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indices.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(scenes_list, p_interval, p_feature)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_filename_path = os.path.join(custom_min_max_folder, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(scenes_list, p_filename, p_interval, p_kind, p_feature, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_each, p_custom)
+
+if __name__== "__main__":
+    main()

+ 95 - 0
models.py

@@ -0,0 +1,95 @@
+# models imports
+from sklearn.model_selection import GridSearchCV
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier, VotingClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.feature_selection import RFECV
+import sklearn.svm as svm
+
+
+def _get_best_model(X_train, y_train):
+
+    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
+    gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
+    param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
+
+    svc = svm.SVC(probability=True)
+    clf = GridSearchCV(svc, param_grid, cv=10, scoring='accuracy', verbose=0)
+
+    clf.fit(X_train, y_train)
+
+    model = clf.best_estimator_
+
+    return model
+
+def svm_model(X_train, y_train):
+
+    return _get_best_model(X_train, y_train)
+
+
+def ensemble_model(X_train, y_train):
+
+    svm_model = _get_best_model(X_train, y_train)
+
+    lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
+    rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
+
+    ensemble_model = VotingClassifier(estimators=[
+       ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)], voting='soft', weights=[1,1,1])
+
+    ensemble_model.fit(X_train, y_train)
+
+    return ensemble_model
+
+
+def ensemble_model_v2(X_train, y_train):
+
+    svm_model = _get_best_model(X_train, y_train)
+    knc_model = KNeighborsClassifier(n_neighbors=2)
+    gbc_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
+    lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
+    rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
+
+    ensemble_model = VotingClassifier(estimators=[
+       ('lr', lr_model),
+       ('knc', knc_model),
+       ('gbc', gbc_model),
+       ('svm', svm_model),
+       ('rf', rf_model)],
+       voting='soft', weights=[1, 1, 1, 1, 1])
+
+    ensemble_model.fit(X_train, y_train)
+
+    return ensemble_model
+
+def rfe_svm_model(X_train, y_train, n_components=1):
+
+    # Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
+    # gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
+    # param_grid = [{'estimator__C': Cs, 'estimator__gamma' : gammas}]
+
+    gammas = [0.001, 0.01, 0.1]
+    param_grid = [{'estimator__gamma' : gammas}]
+
+    estimator = svm.SVC(kernel="linear")
+    selector = RFECV(estimator, step=1, cv=4, verbose=0)
+    clf = GridSearchCV(selector, param_grid, cv=5, verbose=1)
+    clf.fit(X_train, y_train)
+
+    return clf.best_estimator_
+
+
+def get_trained_model(choice, X_train, y_train):
+
+    if choice == 'svm_model':
+        return svm_model(X_train, y_train)
+
+    if choice == 'ensemble_model':
+        return ensemble_model(X_train, y_train)
+
+    if choice == 'ensemble_model_v2':
+        return ensemble_model_v2(X_train, y_train)
+
+    if choice == 'rfe_svm_model':
+        return rfe_svm_model(X_train, y_train)

+ 5 - 0
modules/.gitignore

@@ -0,0 +1,5 @@
+# IDE folder
+.vscode
+
+ # python cache
+__pycache__

+ 8 - 0
modules/LICENSE

@@ -0,0 +1,8 @@
+MIT License
+Copyright (c) 2019 prise-3d
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+ 42 - 0
modules/README.md

@@ -0,0 +1,42 @@
+# Thesis common modules
+
+## Description
+
+Project which contains dependencies used in few developed projects:
+
+- [Noise Detection CNN](https://github.com/prise-3d/Thesis-NoiseDetection-CNN.git)
+- [Denoising autoencoder](https://github.com/prise-3d/Thesis-Denoising-autoencoder.git)
+- [Noise Detection attributes](https://github.com/prise-3d/Thesis-NoiseDetection-attributes.git)
+- [Noise Detection 26 attributes](https://github.com/prise-3d/Thesis-NoiseDetection-26-attributes.git)
+- [Noise Analysis](https://github.com/prise-3d/Thesis-NoiseAnalysis.git)
+
+## Configuration file
+
+There is few configuration files (`config` folder):
+- **global:** contains common variables of project
+- **attributes:** extends from global and contains specific variables
+- **cnn:** extends from global and contains specific variables for Deep Learning
+
+## Add as dependency
+
+```bash
+git submodule add https://github.com/prise-3d/Thesis-CommonModules.git modules
+```
+
+## Dataset information
+
+| ID | Name | Renderer | Number of Images |
+|:---:|---:|---:|---:|
+| A | Appart1opt02 | maxwell | 89 |
+| B | Bureau1 | igloo | 200 |
+| C | Cendrier | | 25 |
+| D | Cuisine01 | maxwell | 116 |
+| E | EchecsBas | cycle| 200 |
+| F | PNDVuePlongeante | igloo | 800 |
+| G | SdbCentre | maxwell | 94 |
+| H | SdbDroite | maxwell | 94 |
+| I | Selles | cycle | 62 |
+
+## License
+
+[The MIT License](LICENSE)

+ 0 - 0
modules/__init__.py


+ 90 - 0
modules/classes/Transformation.py

@@ -0,0 +1,90 @@
+# main imports
+import os
+import numpy as np
+
+# image processing imports
+from ipfml.processing import transform
+from ipfml.processing import reconstruction
+from ipfml.filters import convolution, kernels
+from ipfml import utils
+
+from PIL import Image
+
+
+# Transformation class to store transformation method of image and get usefull information
+class Transformation():
+
+    def __init__(self, _transformation, _param, _size):
+        self.transformation = _transformation
+        self.param = _param
+        self.size = _size
+
+    def getTransformedImage(self, img):
+
+        if self.transformation == 'svd_reconstruction':
+            begin, end = list(map(int, self.param.split(',')))
+            data = reconstruction.svd(img, [begin, end])
+
+        if self.transformation == 'ipca_reconstruction':
+            n_components, batch_size = list(map(int, self.param.split(',')))
+            data = reconstruction.ipca(img, n_components, batch_size)
+
+        if self.transformation == 'fast_ica_reconstruction':
+            n_components = self.param
+            data = reconstruction.fast_ica(img, n_components)
+
+        if self.transformation == 'min_diff_filter':
+            w_size, h_size = list(map(int, self.param.split(',')))
+            h, w = list(map(int, self.size.split(',')))
+
+            # bilateral with window of size (`w_size`, `h_size`)
+            lab_img = transform.get_LAB_L(img)
+
+            lab_img = Image.fromarray(lab_img)
+            lab_img.thumbnail((h, w))
+
+            diff_img = convolution.convolution2D(lab_img, kernels.min_bilateral_diff, (w_size, h_size))
+
+            data = np.array(diff_img*255, 'uint8')
+            
+        if self.transformation == 'static':
+            # static content, we keep input as it is
+            data = img
+
+        return data
+    
+    def getTransformationPath(self):
+
+        path = self.transformation
+
+        if self.transformation == 'svd_reconstruction':
+            begin, end = list(map(int, self.param.split(',')))
+            path = os.path.join(path, str(begin) + '_' + str(end))
+
+        if self.transformation == 'ipca_reconstruction':
+            n_components, batch_size = list(map(int, self.param.split(',')))
+            path = os.path.join(path, 'N' + str(n_components) + '_' + str(batch_size))
+
+        if self.transformation == 'fast_ica_reconstruction':
+            n_components = self.param
+            path = os.path.join(path, 'N' + str(n_components))
+
+        if self.transformation == 'min_diff_filter':
+            w_size, h_size = list(map(int, self.param.split(',')))
+            w, h = list(map(int, self.size.split(',')))
+            path = os.path.join(path, 'W_' + str(w_size)) + '_' + str(h_size) + '_S_' + str(w) + '_' + str(h)
+
+        if self.transformation == 'static':
+            # param contains image name to find for each scene
+            path = self.param
+
+        return path
+
+    def getName(self):
+        return self.transformation
+
+    def getParam(self):
+        return self.param
+
+    def __str__( self ):
+        return self.transformation + ' transformation with parameter : ' + self.param

+ 0 - 0
modules/classes/__init__.py


+ 0 - 0
modules/config/__init__.py


+ 19 - 0
modules/config/attributes_config.py

@@ -0,0 +1,19 @@
+from .global_config import *
+
+# store all variables from global config
+context_vars = vars()
+
+# folders
+min_max_custom_folder           = 'custom_norm'
+correlation_indices_folder      = 'corr_indices'
+
+# variables
+features_choices_labels         = ['lab', 'mscn', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2', 'sub_blocks_stats', 'sub_blocks_area', 'sub_blocks_stats_reduced', 'sub_blocks_area_normed', 'mscn_var_4', 'mscn_var_16', 'mscn_var_64', 'mscn_var_16_max', 'mscn_var_64_max', 'ica_diff', 'svd_trunc_diff', 'ipca_diff', 'svd_reconstruct', 'highest_sv_std_filters', 'lowest_sv_std_filters', 'highest_wave_sv_std_filters', 'lowest_wave_sv_std_filters']
+
+models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2","deep_keras"]
+normalization_choices           = ['svd', 'svdn', 'svdne']
+
+# parameters
+keras_epochs                    = 500
+keras_batch                     = 32
+val_dataset_size                = 0.2

+ 21 - 0
modules/config/cnn_config.py

@@ -0,0 +1,21 @@
+from .global_config import *
+
+# store all variables from global config
+context_vars = vars()
+
+# folders
+noisy_folder                    = 'noisy'
+not_noisy_folder                = 'notNoisy'
+
+# file or extensions
+post_image_name_separator       = '___'
+
+# variables
+features_choices_labels         = ['static', 'svd_reconstruction', 'fast_ica_reconstruction', 'ipca_reconstruction']
+
+# parameters
+keras_epochs                    = 30
+keras_batch                     = 32
+val_dataset_size                = 0.2
+
+keras_img_size                  = (200, 200)

+ 38 - 0
modules/config/global_config.py

@@ -0,0 +1,38 @@
+import numpy as np
+
+# folders
+zone_folder                     = 'zone'
+output_data_folder              = 'data'
+dataset_path                    = 'dataset'
+threshold_map_folder            = 'threshold_map'
+models_information_folder       = 'models_info'
+results_information_folder      = 'results'
+saved_models_folder             = 'saved_models'
+min_max_custom_folder           = 'custom_norm'
+learned_zones_folder            = 'learned_zones'
+
+# files or extensions
+csv_model_comparisons_filename  = 'models_comparisons.csv'
+seuil_expe_filename             = 'seuilExpe'
+min_max_filename_extension      = '_min_max_values'
+
+# variables 
+renderer_choices                = ['all', 'maxwell', 'igloo', 'cycle']
+
+scenes_names                    = ['Appart1opt02', 'Bureau1', 'Cendrier', 'Cuisine01', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
+scenes_indices                  = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
+
+maxwell_scenes_names            = ['Appart1opt02', 'Cuisine01', 'SdbCentre', 'SdbDroite']
+maxwell_scenes_indices          = ['A', 'D', 'G', 'H']
+
+igloo_scenes_names              = ['Bureau1', 'PNDVuePlongeante']
+igloo_scenes_indices            = ['B', 'F']
+
+cycle_scenes_names              = ['EchecBas', 'Selles']
+cycle_scenes_indices            = ['E', 'I']
+
+zones_indices                   = np.arange(16)
+
+# parameters
+scene_image_quality_separator     = '_'
+scene_image_extension             = '.png'

+ 34 - 0
modules/oar/README.md

@@ -0,0 +1,34 @@
+# Run on Calculco
+
+## Preparation of project
+
+```
+cd ~/projects
+git clone https://%PROJET_NAME%.git %PROJET_NAME%
+```
+
+Push you `dataset` into the /scratch/orvalXX/lisic/user/data
+```
+cp -r %PROJET_NAME% /scratch/orvalXX/lisic/user/%PROJET_NAME%
+```
+
+## Link data
+
+Create symbolic links to `dataset` of project:
+```
+ln -s /scracth/orvalXX/lisic/user/data/%PROJET_NAME% dataset
+```
+
+Create all usefull symbolic links to for project:
+```
+bash modules/oar/generate_symlinks orvalXX lisic/user/projects/%PROJET_NAME%
+```
+
+**Note:** `modules` is the submodule name of this project into your own project.
+
+## Run script
+
+Create your `oar.sh` script based on `oar.example.sh` and run it:
+```
+oarsub -S oar.sh
+```

+ 39 - 0
modules/oar/generate_symlinks.sh

@@ -0,0 +1,39 @@
+#! /bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need to specify orval you want to use (in /scratch folder)"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No argument supplied"
+    echo "Need to specify where you want to store data"
+    exit 1
+fi
+
+
+echo "Creating links into /scratch folder"
+
+scratch="/scratch"
+orval=$1
+path=$2
+
+
+for link in {"data","results","logs","saved_models","models_info","models_backup","threshold_map","learned_zones","custom_norm"}; do
+    
+    if [ -L ${link} ]; then
+        rm ${link}
+    fi
+    
+    fullpath=${scratch}/${orval}/${path}/${link}
+
+    if [ ! -d "${fullpath}" ]; then
+        mkdir -p ${fullpath}
+    fi
+    
+    # remove `orval` name for running part
+    ln -s ${scratch}/${path}/${link} ${link}
+done

+ 15 - 0
modules/oar/oar.example.sh

@@ -0,0 +1,15 @@
+#!/bin/sh
+
+#OAR --array-param-file params.txt
+#OAR -l /nodes=1,walltime=6:00:00
+#OAR -p host="orval02"
+#OAR -t besteffort
+#OAR --notify mail:jerome.buisine@univ-littoral.fr
+#OAR -O /nfs/home/lisic/jbuisine/projects/launchers/logs/Thesis-NoiseDetection-CNN.%jobid%.out
+#OAR -E /nfs/home/lisic/jbuisine/projects/launchers/logs/Thesis-NoiseDetection-CNN.%jobid%.err
+
+# Activiate venv used by python
+. ~/opt/venvs/thesis-venv/bin/activate
+
+# run command
+python ~/projects/Thesis-NoiseDetection-CNN/generate/generate_reconstructed_data.py $@

+ 5 - 0
modules/requirements.txt

@@ -0,0 +1,5 @@
+os
+numpy
+ipfml
+sklearn
+Pillow

+ 0 - 0
modules/utils/__init__.py


+ 82 - 0
modules/utils/data.py

@@ -0,0 +1,82 @@
+import os
+
+from PIL import Image
+
+from ..config.cnn_config import *
+
+
+_scenes_names_prefix   = '_scenes_names'
+_scenes_indices_prefix = '_scenes_indices'
+
+# store all variables from current module context
+context_vars = vars()
+
+def get_renderer_scenes_indices(renderer_name):
+
+    if renderer_name not in renderer_choices:
+        raise ValueError("Unknown renderer name")
+
+    if renderer_name == 'all':
+        return scenes_indices
+    else:
+        return context_vars[renderer_name + _scenes_indices_prefix]
+
+def get_renderer_scenes_names(renderer_name):
+
+    if renderer_name not in renderer_choices:
+        raise ValueError("Unknown renderer name")
+
+    if renderer_name == 'all':
+        return scenes_names
+    else:
+        return context_vars[renderer_name + _scenes_names_prefix]
+
+