Sfoglia il codice sorgente

Merge branch 'release/v0.1.8'

Jérôme BUISINE 5 anni fa
parent
commit
3b1038068e
38 ha cambiato i file con 927 aggiunte e 1267 eliminazioni
  1. 1 0
      .gitignore
  2. 18 16
      README.md
  3. 1 1
      cnn_keras_svd.py
  4. 2 2
      display_bits_shifted_scene.py
  5. 13 13
      display_scenes_zones.py
  6. 4 4
      display_scenes_zones_shifted.py
  7. 4 4
      display_svd_zone_scene.py
  8. 1 1
      generateAndTrain_maxwell.sh
  9. 74 0
      generateAndTrain_maxwell_custom.sh
  10. 4 4
      generate_all_data.py
  11. 3 3
      generate_data_model.py
  12. 277 0
      generate_data_model_r.py
  13. 137 44
      generate_data_model_random.py
  14. 30 15
      generate_data_model_random_maxwell.py
  15. 0 77
      metrics_predictions/predict_noisy_image_svd_lab.py
  16. 0 78
      metrics_predictions/predict_noisy_image_svd_low_bits_2.py
  17. 0 78
      metrics_predictions/predict_noisy_image_svd_low_bits_3.py
  18. 0 78
      metrics_predictions/predict_noisy_image_svd_low_bits_4.py
  19. 0 78
      metrics_predictions/predict_noisy_image_svd_low_bits_4_shifted_2.py
  20. 0 78
      metrics_predictions/predict_noisy_image_svd_low_bits_5.py
  21. 0 78
      metrics_predictions/predict_noisy_image_svd_low_bits_6.py
  22. 0 86
      metrics_predictions/predict_noisy_image_svd_mscn.py
  23. 0 88
      metrics_predictions/predict_noisy_image_svd_mscn_revisited.py
  24. 0 167
      models/ensemble_model_train.py
  25. 0 174
      models/ensemble_model_v2_train.py
  26. 75 0
      modules/models.py
  27. 5 4
      modules/utils/config.py
  28. 11 22
      modules/utils/data_type.py
  29. 102 0
      predict_noisy_image_svd.py
  30. 19 10
      predict_seuil_expe.py
  31. 19 11
      predict_seuil_expe_maxwell.py
  32. 25 15
      predict_seuil_expe_maxwell_curve.py
  33. 2 3
      run_maxwell_simulation.sh
  34. 63 0
      run_maxwell_simulation_custom.sh
  35. 1 1
      save_model_result_in_md.py
  36. 3 2
      save_model_result_in_md_maxwell.py
  37. 8 1
      testModelByScene_maxwell.sh
  38. 25 31
      models/svm_model_train.py

+ 1 - 0
.gitignore

@@ -3,6 +3,7 @@ data/*
 saved_models/*
 threshold_map/*
 models_info/*
+custom_norm/*
 
 simulate_models.csv
 

+ 18 - 16
README.md

@@ -26,13 +26,15 @@ You can also specify metric you want to compute and image step to avoid some ima
 python generate_all_data.py --metric mscn --step 50
 ```
 
+- **step** : keep only image if image id % 50 == 0 (assumption is that keeping spaced data will let model better fit).
+
 ## How to use
 
 ### Multiple folders and scripts are availables :
 
 
 - **fichiersSVD_light/\*** : all scene files information (zones of each scene, SVD descriptor files information and so on...).
-- **models/*.py** : all models developed to predict noise in image.
+- **train_model.py** : script which is used to run specific model available.
 - **data/\*** : folder which will contain all *.train* & *.test* files in order to train model.
 - **saved_models/*.joblib** : all scikit learn models saved.
 - **models_info/*** : all markdown files generated to get quick information about model performance and prediction. This folder contains also **model_comparisons.csv** obtained after running runAll_maxwell.sh script.
@@ -51,7 +53,7 @@ Two scripts can be used for generating data in order to fit model :
 ```
 python generate_data_model.py --help
 
-python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --scenes "A, B, D" --zones "0, 1, 2" --percent 0.7 --sep : --rowindex 1
+python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --scenes "A, B, D" --zones "0, 1, 2" --percent 0.7 --sep : --rowindex 1 --custom custom_min_max_filename
 ```
 
 Parameters explained :
@@ -63,37 +65,36 @@ Parameters explained :
 - **percent** : percent of data amount of zone to take (choose randomly) of zone
 - **sep** : output csv file seperator used
 - **rowindex** : if 1 then row will be like that 1:xxxxx, 2:xxxxxx, ..., n:xxxxxx
+- **custom** : specify if you want your data normalized using interval and not the whole singular values vector. If it is, the value of this parameter is the output filename which will store the min and max value found. This file will be usefull later to make prediction with model (optional parameter).
 
 ### Train model
 
 This is an example of how to train a model
 
 ```bash
-python models/xxxxx.py --data 'data/xxxxx.train' --output 'model_file_to_save'
+python train_model.py --data 'data/xxxxx.train' --output 'model_file_to_save' --choice 'model_choice'
 ```
 
+Expected values for the **choice** parameter are ['svm_model', 'ensemble_model', 'ensemble_model_v2'].
+
 ### Predict image using model
 
 Now we have a model trained, we can use it with an image as input :
 
 ```bash
-python metrics_predictions/predict_noisy_image_svd_lab.py --image path/to/image.png --interval "x,x" --model saved_models/xxxxxx.joblib --mode 'svdn'
+python predict_noisy_image_svd.py --image path/to/image.png --interval "x,x" --model saved_models/xxxxxx.joblib --metric 'lab' --mode 'svdn' --custom 'min_max_filename'
 ```
 
+- **metric** : metric choice need to be one of the listed above.
+- **custom** : specify filename with custom min and max from your data interval. This file was generated using **custom** parameter of one of the **generate_data_model\*.py** script (optional parameter).
+
 The model will return only 0 or 1 :
 - 1 means noisy image is detected.
 - 0 means image seem to be not noisy.
 
-You can also use other specific metric
-
-```bash
-python metrics_predictions/predict_noisy_image_svd_mscn.py --image path/to/image.png --interval "x,x" --model saved_models/xxxxxx.joblib --mode 'svdn'
-```
-
-All SVD metrics you developed need :
-- Name added into *metric_choices* global array variable of **generate_all_data.py** file.
-- A specification of how you compute the metric into generate_data_svd method of **generate_all_data.py** file.
-- A prediction script into **metrics_predictions** folder. Name need to follow this rule : *predict_noisy_image_svd_xxxx.py*
+All SVD metrics developed need :
+- Name added into *metric_choices_labels* global array variable of **modules/utils/config.py** file.
+- A specification of how you compute the metric into *get_svd_data* method of **modules/utils/data_type.py** file.
 
 ### Predict scene using model
 
@@ -112,7 +113,7 @@ Just use --help option to get more information.
 
 ### Simulate model on scene
 
-All scripts named **predict_seuil_expe\*.py** are used to simulate model prediction during rendering process.
+All scripts named **predict_seuil_expe\*.py** are used to simulate model prediction during rendering process. Do not forget the **custom** parameter filename if necessary.
 
 Once you have simulation done. Checkout your **threshold_map/%MODEL_NAME%/simulation\_curves\_zones\_\*/** folder and use it with help of **display_simulation_curves.py** script.
 
@@ -139,7 +140,7 @@ Parameters list :
 Main objective of this project is to predict as well as a human the noise perception on a photo realistic image. Human threshold is available from training data. So a script was developed to give the predicted treshold from model and compare predicted treshold from the expected one.
 
 ```bash
-python predict_seuil_expe.py --interval "x,x" --model 'saved_models/xxxx.joblib' --mode ["svd", "svdn", "svdne"] --metric ['lab', 'mscn', ...] --limit_detection xx
+python predict_seuil_expe.py --interval "x,x" --model 'saved_models/xxxx.joblib' --mode ["svd", "svdn", "svdne"] --metric ['lab', 'mscn', ...] --limit_detection xx --custom 'custom_min_max_filename'
 ```
 
 Parameters list :
@@ -147,6 +148,7 @@ Parameters list :
 - **interval** : the interval of data you want to use from SVD vector.
 - **mode** : kind of data ['svd', 'svdn', 'svdne']; not normalize, normalize vector only and normalize together.
 - **limit_detection** : number of not noisy images found to stop and return threshold (integer).
+- **custom** : custom filename where min and max values are stored (optional parameter).
 
 ### Display model performance information
 

+ 1 - 1
cnn_keras_svd.py

@@ -10,7 +10,7 @@ from sklearn.utils import shuffle
 import numpy as np
 import pandas as pd
 
-from ipfml import image_processing
+from ipfml import processing
 from PIL import Image
 
 import sys, os, getopt

+ 2 - 2
display_bits_shifted_scene.py

@@ -14,7 +14,7 @@ import time
 import json
 
 from PIL import Image
-from ipfml import image_processing
+from ipfml import processing
 from ipfml import metrics
 from skimage import color
 import matplotlib.pyplot as plt
@@ -135,7 +135,7 @@ def display_data_scenes(nb_bits, p_scene):
 
                     block_used = np.array(current_img)
 
-                    low_bits_block = image_processing.rgb_to_LAB_L_bits(block_used, (i + 1, i + nb_bits + 1))
+                    low_bits_block = processing.rgb_to_LAB_L_bits(block_used, (i + 1, i + nb_bits + 1))
                     low_bits_svd = metrics.get_SVD_s(low_bits_block)
                     low_bits_svd = [b / low_bits_svd[0] for b in low_bits_svd]
                     low_bits_svd_values[i].append(low_bits_svd)

+ 13 - 13
display_scenes_zones.py

@@ -14,7 +14,7 @@ import time
 import json
 
 from PIL import Image
-from ipfml import image_processing
+from ipfml import processing
 from ipfml import metrics
 from skimage import color
 import matplotlib.pyplot as plt
@@ -120,7 +120,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
                     img_path = os.path.join(scene_path, prefix_image_name + index + ".png")
 
                     current_img = Image.open(img_path)
-                    img_blocks = image_processing.divide_in_blocks(current_img, (200, 200))
+                    img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
                     # getting expected block id
                     block = img_blocks[id_zone]
@@ -131,11 +131,11 @@ def display_data_scenes(data_type, p_scene, p_kind):
 
                         block_file_path = '/tmp/lab_img.png'
                         block.save(block_file_path)
-                        data = image_processing.get_LAB_L_SVD_s(Image.open(block_file_path))
+                        data = processing.get_LAB_L_SVD_s(Image.open(block_file_path))
 
                     if data_type == 'mscn_revisited':
 
-                        img_mscn_revisited = image_processing.rgb_to_mscn(block)
+                        img_mscn_revisited = processing.rgb_to_mscn(block)
 
                         # save tmp as img
                         img_output = Image.fromarray(img_mscn_revisited.astype('uint8'), 'L')
@@ -149,8 +149,8 @@ def display_data_scenes(data_type, p_scene, p_kind):
                     if data_type == 'mscn':
 
                         img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
-                        img_mscn = image_processing.calculate_mscn_coefficients(img_gray, 7)
-                        img_mscn_norm = image_processing.normalize_2D_arr(img_mscn)
+                        img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
+                        img_mscn_norm = processing.normalize_2D_arr(img_mscn)
 
                         img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
 
@@ -158,7 +158,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
 
                     if data_type == 'low_bits_6':
 
-                        low_bits_6 = image_processing.rgb_to_LAB_L_low_bits(block, 63)
+                        low_bits_6 = processing.rgb_to_LAB_L_low_bits(block, 63)
 
                         # extract from temp image
                         data = metrics.get_SVD_s(low_bits_6)
@@ -166,7 +166,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
 
                     if data_type == 'low_bits_5':
 
-                        low_bits_5 = image_processing.rgb_to_LAB_L_low_bits(block, 31)
+                        low_bits_5 = processing.rgb_to_LAB_L_low_bits(block, 31)
 
                         # extract from temp image
                         data = metrics.get_SVD_s(low_bits_5)
@@ -174,21 +174,21 @@ def display_data_scenes(data_type, p_scene, p_kind):
 
                     if data_type == 'low_bits_4':
 
-                        low_bits_4 = image_processing.rgb_to_LAB_L_low_bits(block)
+                        low_bits_4 = processing.rgb_to_LAB_L_low_bits(block)
 
                         # extract from temp image
                         data = metrics.get_SVD_s(low_bits_4)
 
                     if data_type == 'low_bits_3':
 
-                        low_bits_3 = image_processing.rgb_to_LAB_L_low_bits(block, 7)
+                        low_bits_3 = processing.rgb_to_LAB_L_low_bits(block, 7)
 
                         # extract from temp image
                         data = metrics.get_SVD_s(low_bits_3)
 
                     if data_type == 'low_bits_2':
 
-                        low_bits_2 = image_processing.rgb_to_LAB_L_low_bits(block, 3)
+                        low_bits_2 = processing.rgb_to_LAB_L_low_bits(block, 3)
 
                         # extract from temp image
                         data = metrics.get_SVD_s(low_bits_2)
@@ -200,7 +200,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
                     # modify data depending mode
 
                     if p_kind == 'svdn':
-                        data = image_processing.normalize_arr(data)
+                        data = processing.normalize_arr(data)
 
                     if p_kind == 'svdne':
                         path_min_max = os.path.join(path, data_type + min_max_filename)
@@ -209,7 +209,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
                             min_val = float(f.readline())
                             max_val = float(f.readline())
 
-                        data = image_processing.normalize_arr_with_range(data, min_val, max_val)
+                        data = processing.normalize_arr_with_range(data, min_val, max_val)
 
                     # append of data
                     images_data.append(data)

+ 4 - 4
display_scenes_zones_shifted.py

@@ -14,7 +14,7 @@ import time
 import json
 
 from PIL import Image
-from ipfml import image_processing
+from ipfml import processing
 from ipfml import metrics
 from skimage import color
 import matplotlib.pyplot as plt
@@ -121,14 +121,14 @@ def display_data_scenes(p_scene, p_bits, p_shifted):
                     img_path = os.path.join(scene_path, prefix_image_name + index + ".png")
 
                     current_img = Image.open(img_path)
-                    img_blocks = image_processing.divide_in_blocks(current_img, (200, 200))
+                    img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
                     # getting expected block id
                     block = img_blocks[id_zone]
 
                     # get data from mode
                     # Here you can add the way you compute data
-                    low_bits_block = image_processing.rgb_to_LAB_L_bits(block, (p_shifted + 1, p_shifted + p_bits + 1))
+                    low_bits_block = processing.rgb_to_LAB_L_bits(block, (p_shifted + 1, p_shifted + p_bits + 1))
                     data = metrics.get_SVD_s(low_bits_block)
 
                     ##################
@@ -136,7 +136,7 @@ def display_data_scenes(p_scene, p_bits, p_shifted):
                     ##################
 
                     # modify data depending mode
-                    data = image_processing.normalize_arr(data)
+                    data = processing.normalize_arr(data)
                     images_data.append(data)
 
                 zones_images_data.append(images_data)

+ 4 - 4
display_svd_zone_scene.py

@@ -15,7 +15,7 @@ import time
 import json
 
 from PIL import Image
-from ipfml import image_processing
+from ipfml import processing
 from ipfml import metrics
 from skimage import color
 
@@ -133,7 +133,7 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                 img_path = os.path.join(scene_path, prefix_image_name + str(index) + ".png")
 
                 current_img = Image.open(img_path)
-                img_blocks = image_processing.divide_in_blocks(current_img, (200, 200))
+                img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
                 # getting expected block id
                 block = img_blocks[p_zone]
@@ -153,10 +153,10 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                         min_val = float(f.readline())
                         max_val = float(f.readline())
 
-                    data = image_processing.normalize_arr_with_range(data, min_val, max_val)
+                    data = processing.normalize_arr_with_range(data, min_val, max_val)
 
                 if p_mode == 'svdn':
-                    data = image_processing.normalize_arr(data)
+                    data = processing.normalize_arr(data)
 
                 zones_images_data.append(data)
 

+ 1 - 1
generateAndTrain_maxwell.sh

@@ -55,7 +55,7 @@ for counter in {0..4}; do
                     echo "${MODEL_NAME} results already generated..."
                 else
                     python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
-                    python models/${model}_train.py --data ${FILENAME} --output ${MODEL_NAME}
+                    python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
                     #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2'
                     python save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}

+ 74 - 0
generateAndTrain_maxwell_custom.sh

@@ -0,0 +1,74 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No argument supplied"
+    echo "Need of metric information"
+    exit 1
+fi
+
+result_filename="models_info/models_comparisons.csv"
+VECTOR_SIZE=200
+size=$1
+metric=$2
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+half=$(($size/2))
+start=-$half
+for counter in {0..4}; do
+    end=$(($start+$size))
+
+    if [ "$end" -gt "$VECTOR_SIZE" ]; then
+        start=$(($VECTOR_SIZE-$size))
+        end=$(($VECTOR_SIZE))
+    fi
+
+    if [ "$start" -lt "0" ]; then
+        start=$((0))
+        end=$(($size))
+    fi
+
+    for nb_zones in {4,6,8,10,12,14}; do
+
+        echo $start $end
+
+        for mode in {"svd","svdn","svdne"}; do
+            for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+                FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
+
+                echo $FILENAME
+
+                # only compute if necessary (perhaps server will fall.. Just in case)
+                if grep -q "${MODEL_NAME}" "${result_filename}"; then
+
+                    echo "${MODEL_NAME} results already generated..."
+                else
+                    python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0' --custom ${CUSTOM_MIN_MAX_FILENAME}
+                    python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                    #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+                    python save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+                fi
+            done
+        done
+    done
+
+    if [ "$counter" -eq "0" ]; then
+        start=$(($start+50-$half))
+    else
+        start=$(($start+50))
+    fi
+
+done

+ 4 - 4
generate_all_data.py

@@ -15,7 +15,7 @@ import json
 
 from modules.utils.data_type import get_svd_data
 from PIL import Image
-from ipfml import image_processing
+from ipfml import processing
 from ipfml import metrics
 from skimage import color
 
@@ -111,7 +111,7 @@ def generate_data_svd(data_type, mode):
                 img_path = os.path.join(scene_path, prefix_image_name + current_counter_index_str + ".png")
 
                 current_img = Image.open(img_path)
-                img_blocks = image_processing.divide_in_blocks(current_img, (200, 200))
+                img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
                 for id_block, block in enumerate(img_blocks):
 
@@ -133,10 +133,10 @@ def generate_data_svd(data_type, mode):
                             min_val = float(f.readline())
                             max_val = float(f.readline())
 
-                        data = image_processing.normalize_arr_with_range(data, min_val, max_val)
+                        data = processing.normalize_arr_with_range(data, min_val, max_val)
 
                     if mode == 'svdn':
-                        data = image_processing.normalize_arr(data)
+                        data = processing.normalize_arr(data)
 
                     # save min and max found from dataset in order to normalize data using whole data known
                     if mode == 'svd':

+ 3 - 3
generate_data_model.py

@@ -83,7 +83,7 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
             zones_folder.append("zone"+index_str)
 
         for id_zone, zone_folder in enumerate(zones_folder):
-            zone_path = os.path.join(scene_path, zone_folder) 
+            zone_path = os.path.join(scene_path, zone_folder)
             data_filename = _metric + "_" + _choice + generic_output_file_svd
             data_file_path = os.path.join(zone_path, data_filename)
 
@@ -105,7 +105,7 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
                 line = construct_new_line(path_seuil, _interval, lines[index], _sep, _index)
 
                 percent = counter / num_lines
-                
+
                 if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
                     train_file.write(line)
                 else:
@@ -172,4 +172,4 @@ def main():
     generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_zones, p_percent, p_sep, p_rowindex)
 
 if __name__== "__main__":
-    main()
+    main()

+ 277 - 0
generate_data_model_r.py

@@ -0,0 +1,277 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics
+
+from modules.utils import config as cfg
+
+# getting configuration information
+config_filename         = cfg.config_filename
+zone_folder             = cfg.zone_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list             = cfg.scenes_names
+scenes_indexes          = cfg.scenes_indices
+choices                 = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+metric_choices          = cfg.metric_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+zones_indices           = cfg.zones_indices
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval = sys.maxsize
+max_value_interval = 0
+
+def construct_new_line(path_seuil, interval, line, norm, sep, index):
+    begin, end = interval
+
+    line_data = line.split(';')
+    seuil = line_data[0]
+    metrics = line_data[begin+1:end+1]
+
+    metrics = [float(m) for m in metrics]
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+
+    with open(path_seuil, "r") as seuil_file:
+        seuil_learned = int(seuil_file.readline().strip())
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for idx, val in enumerate(metrics):
+        if index:
+            line += " " + str(idx + 1)
+        line += sep
+        line += str(val)
+    line += '\n'
+
+    return line
+
+def get_min_max_value_interval(_filename, _interval, _choice, _metric):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            # shuffle list of zones (=> randomly choose zones)
+            random.shuffle(zones_folder)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                counter = 0
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+                    metrics = line_data[begin+1:end+1]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+                    counter += 1
+
+
+def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _zones = zones_indices, _percent = 1, _norm = False, _sep=':', _index=True):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                num_lines = len(lines)
+
+                lines_indexes = np.arange(num_lines)
+                random.shuffle(lines_indexes)
+
+                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+                counter = 0
+                # check if user select current scene and zone to be part of training data set
+                for index in lines_indexes:
+                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
+
+                    percent = counter / num_lines
+
+                    if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
+                        train_file.write(line)
+                    else:
+                        test_file.write(line)
+
+                    counter += 1
+
+                f.close()
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    p_custom = False
+
+    if len(sys.argv) <= 1:
+        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "zones=", "percent=", "sep=", "rowindex=", "custom="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+
+            sys.exit()
+        elif o in ("-o", "--output"):
+            p_filename = a
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+        elif o in ("-k", "--kind"):
+            p_kind = a
+        elif o in ("-m", "--metric"):
+            p_metric = a
+        elif o in ("-s", "--scenes"):
+            p_scenes = a.split(',')
+        elif o in ("-z", "--zones"):
+            if ',' in a:
+                p_zones = list(map(int, a.split(',')))
+            else:
+                p_zones = [a.strip()]
+        elif o in ("-p", "--percent"):
+            p_percent = float(a)
+        elif o in ("-s", "--sep"):
+            p_sep = a
+        elif o in ("-r", "--rowindex"):
+            if int(a) == 1:
+                p_rowindex = True
+            else:
+                p_rowindex = False
+        elif o in ("-c", "--custom"):
+            p_custom = a
+        else:
+            assert False, "unhandled option"
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indexes.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_zones, p_percent, p_custom, p_sep, p_rowindex)
+
+if __name__== "__main__":
+    main()

+ 137 - 44
generate_data_model_random.py

@@ -14,8 +14,7 @@ import time
 import json
 
 from PIL import Image
-from ipfml import image_processing
-from ipfml import metrics
+from ipfml import processing, metrics
 
 from modules.utils import config as cfg
 
@@ -33,16 +32,28 @@ zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
 
 metric_choices          = cfg.metric_choices_labels
-
 output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval = sys.maxsize
+max_value_interval = 0
 
-def construct_new_line(path_seuil, interval, line, sep, index):
+def construct_new_line(path_seuil, interval, line, norm, sep, index):
     begin, end = interval
 
     line_data = line.split(';')
     seuil = line_data[0]
     metrics = line_data[begin+1:end+1]
 
+    metrics = [float(m) for m in metrics]
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+
     with open(path_seuil, "r") as seuil_file:
         seuil_learned = int(seuil_file.readline().strip())
 
@@ -55,12 +66,71 @@ def construct_new_line(path_seuil, interval, line, sep, index):
         if index:
             line += " " + str(idx + 1)
         line += sep
-        line += val
+        line += str(val)
     line += '\n'
 
     return line
 
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes, _nb_zones = 4, _percent = 1, _sep=':', _index=True):
+def get_min_max_value_interval(_filename, _interval, _choice, _metric):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            # shuffle list of zones (=> randomly choose zones)
+            random.shuffle(zones_folder)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                counter = 0
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+                    metrics = line_data[begin+1:end+1]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+                    counter += 1
+
+
+def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1, _norm = False, _sep=':', _index=True):
 
     output_train_filename = _filename + ".train"
     output_test_filename = _filename + ".test"
@@ -81,50 +151,54 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
     scenes = [s for s in scenes if min_max_filename not in s]
 
     for id_scene, folder_scene in enumerate(scenes):
-        scene_path = os.path.join(path, folder_scene)
 
-        zones_folder = []
-        # create zones list
-        for index in zones:
-            index_str = str(index)
-            if len(index_str) < 2:
-                index_str = "0" + index_str
-            zones_folder.append("zone"+index_str)
+        # only take care of maxwell scenes
+        if folder_scene in scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
 
-        # shuffle list of zones (=> randomly choose zones)
-        random.shuffle(zones_folder)
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
 
-        for id_zone, zone_folder in enumerate(zones_folder):
-            zone_path = os.path.join(scene_path, zone_folder)
-            data_filename = _metric + "_" + _choice + generic_output_file_svd
-            data_file_path = os.path.join(zone_path, data_filename)
+            # shuffle list of zones (=> randomly choose zones)
+            random.shuffle(zones_folder)
 
-            # getting number of line and read randomly lines
-            f = open(data_file_path)
-            lines = f.readlines()
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
 
-            num_lines = len(lines)
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
 
-            lines_indexes = np.arange(num_lines)
-            random.shuffle(lines_indexes)
+                num_lines = len(lines)
 
-            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+                lines_indexes = np.arange(num_lines)
+                random.shuffle(lines_indexes)
 
-            counter = 0
-            # check if user select current scene and zone to be part of training data set
-            for index in lines_indexes:
-                line = construct_new_line(path_seuil, _interval, lines[index], _sep, _index)
+                path_seuil = os.path.join(zone_path, seuil_expe_filename)
 
-                percent = counter / num_lines
+                counter = 0
+                # check if user select current scene and zone to be part of training data set
+                for index in lines_indexes:
+                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
 
-                if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
-                    train_file.write(line)
-                else:
-                    test_file.write(line)
+                    percent = counter / num_lines
 
-                counter += 1
+                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                        train_file.write(line)
+                    else:
+                        test_file.write(line)
 
-            f.close()
+                    counter += 1
+
+                f.close()
 
     train_file.close()
     test_file.close()
@@ -132,19 +206,21 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
 
 def main():
 
+    p_custom = False
+
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex="])
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex=", "custom="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1')
+            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
             sys.exit()
         elif o in ("-o", "--output"):
             p_filename = a
@@ -167,6 +243,8 @@ def main():
                 p_rowindex = True
             else:
                 p_rowindex = False
+        elif o in ("-c", "--custom"):
+            p_custom = a
         else:
             assert False, "unhandled option"
 
@@ -175,10 +253,25 @@ def main():
 
     for scene_id in p_scenes:
         index = scenes_indexes.index(scene_id.strip())
-        scenes_selected.append(scenes[index])
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
 
     # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_sep, p_rowindex)
+    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_custom, p_sep, p_rowindex)
 
 if __name__== "__main__":
     main()

+ 30 - 15
generate_data_model_random_maxwell.py

@@ -14,7 +14,7 @@ import time
 import json
 
 from PIL import Image
-from ipfml import image_processing, metrics
+from ipfml import processing, metrics
 
 from modules.utils import config as cfg
 
@@ -33,6 +33,10 @@ seuil_expe_filename     = cfg.seuil_expe_filename
 
 metric_choices          = cfg.metric_choices_labels
 output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+generic_output_file_svd = '_random.csv'
 
 min_value_interval = sys.maxsize
 max_value_interval = 0
@@ -46,8 +50,9 @@ def construct_new_line(path_seuil, interval, line, norm, sep, index):
 
     metrics = [float(m) for m in metrics]
 
+    # TODO : check if it's always necessary to do that (loss of information for svd)
     if norm:
-        metrics = image_processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
 
     with open(path_seuil, "r") as seuil_file:
         seuil_learned = int(seuil_file.readline().strip())
@@ -66,7 +71,7 @@ def construct_new_line(path_seuil, interval, line, norm, sep, index):
 
     return line
 
-def get_min_max_value_interval(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1):
+def get_min_max_value_interval(_filename, _interval, _choice, _metric):
 
     global min_value_interval, max_value_interval
 
@@ -201,19 +206,21 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
 
 def main():
 
+    p_custom = False
+
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --norm 1 --sep : --rowindex 1')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "norm=", "sep=", "rowindex="])
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex=", "custom="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --norm 1 --sep : --rowindex 1')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --norm 1 --sep : --rowindex 1')
+            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
             sys.exit()
         elif o in ("-o", "--output"):
             p_filename = a
@@ -227,11 +234,6 @@ def main():
             p_scenes = a.split(',')
         elif o in ("-n", "--nb_zones"):
             p_nb_zones = int(a)
-        elif o in ("-n", "--norm"):
-            if int(a) == 1:
-                p_norm = True
-            else:
-                p_norm = False
         elif o in ("-p", "--percent"):
             p_percent = float(a)
         elif o in ("-s", "--sep"):
@@ -241,6 +243,8 @@ def main():
                 p_rowindex = True
             else:
                 p_rowindex = False
+        elif o in ("-c", "--custom"):
+            p_custom = a
         else:
             assert False, "unhandled option"
 
@@ -252,11 +256,22 @@ def main():
         scenes_selected.append(scenes_list[index])
 
     # find min max value if necessary to renormalize data
-    if p_norm:
-        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent)
+    if p_custom:
+        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
 
     # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_norm, p_sep, p_rowindex)
+    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_custom, p_sep, p_rowindex)
 
 if __name__== "__main__":
     main()

+ 0 - 77
metrics_predictions/predict_noisy_image_svd_lab.py

@@ -1,77 +0,0 @@
-from sklearn.externals import joblib
-
-import numpy as np
-
-from ipfml import image_processing
-from PIL import Image
-
-import sys, os, getopt
-
-min_max_file_path = 'fichiersSVD_light/lab_min_max_values'
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:o", ["help=", "image=", "interval=", "model=", "mode="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
-                assert False, "Mode not recognized"
-        else:
-            assert False, "unhandled option"
-
-    # load of model file
-    model = joblib.load(p_model_file) 
-
-    # load image
-    img = Image.open(p_img_file)
-    LAB_L = image_processing.get_LAB_L_SVD_s(img)
-
-    # check mode to normalize data
-    if p_mode == 'svdne':
-        
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
-
-        l_values = image_processing.normalize_arr_with_range(LAB_L, min, max)
-
-    elif p_mode == 'svdn':
-        l_values = image_processing.normalize_arr(LAB_L)
-    else:
-        l_values = LAB_L
-
-    
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
-
-    # get prediction of model
-    prediction = model.predict([test_data])[0]
-
-    print(prediction)
-
-
-if __name__== "__main__":
-    main()

+ 0 - 78
metrics_predictions/predict_noisy_image_svd_low_bits_2.py

@@ -1,78 +0,0 @@
-from sklearn.externals import joblib
-
-import numpy as np
-
-from ipfml import image_processing
-from ipfml import metrics
-from PIL import Image
-
-import sys, os, getopt
-
-min_max_file_path = 'fichiersSVD_light/low_bits_2_min_max_values'
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:o", ["help=", "image=", "interval=", "model=", "mode="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
-                assert False, "Mode not recognized"
-        else:
-            assert False, "unhandled option"
-
-    # load of model file
-    model = joblib.load(p_model_file)
-
-    # load image
-    img = Image.open(p_img_file)
-    low_bits_2_values = metrics.get_SVD_s(image_processing.rgb_to_LAB_L_low_bits(img, 3))
-
-    # check mode to normalize data
-    if p_mode == 'svdne':
-
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
-
-        l_values = image_processing.normalize_arr_with_range(low_bits_2_values, min, max)
-
-    elif p_mode == 'svdn':
-        l_values = image_processing.normalize_arr(low_bits_2_values)
-    else:
-        l_values = low_bits_2_values
-
-
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
-
-    # get prediction of model
-    prediction = model.predict([test_data])[0]
-
-    print(prediction)
-
-
-if __name__== "__main__":
-    main()

+ 0 - 78
metrics_predictions/predict_noisy_image_svd_low_bits_3.py

@@ -1,78 +0,0 @@
-from sklearn.externals import joblib
-
-import numpy as np
-
-from ipfml import image_processing
-from ipfml import metrics
-from PIL import Image
-
-import sys, os, getopt
-
-min_max_file_path = 'fichiersSVD_light/low_bits_3_min_max_values'
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:o", ["help=", "image=", "interval=", "model=", "mode="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
-                assert False, "Mode not recognized"
-        else:
-            assert False, "unhandled option"
-
-    # load of model file
-    model = joblib.load(p_model_file)
-
-    # load image
-    img = Image.open(p_img_file)
-    low_bits_3_values = metrics.get_SVD_s(image_processing.rgb_to_LAB_L_low_bits(img, 7))
-
-    # check mode to normalize data
-    if p_mode == 'svdne':
-
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
-
-        l_values = image_processing.normalize_arr_with_range(low_bits_3_values, min, max)
-
-    elif p_mode == 'svdn':
-        l_values = image_processing.normalize_arr(low_bits_3_values)
-    else:
-        l_values = low_bits_3_values
-
-
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
-
-    # get prediction of model
-    prediction = model.predict([test_data])[0]
-
-    print(prediction)
-
-
-if __name__== "__main__":
-    main()

+ 0 - 78
metrics_predictions/predict_noisy_image_svd_low_bits_4.py

@@ -1,78 +0,0 @@
-from sklearn.externals import joblib
-
-from ipfml import image_processing
-from ipfml import metrics
-
-from PIL import Image
-
-import sys, os, getopt
-import numpy as np
-
-min_max_file_path = 'fichiersSVD_light/low_bits_4_min_max_values'
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:o", ["help=", "image=", "interval=", "model=", "mode="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file =  os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
-                assert False, "Mode not recognized"
-        else:
-            assert False, "unhandled option"
-
-    # load of model file
-    model = joblib.load(p_model_file)
-
-    # load image
-    img = Image.open(p_img_file)
-    low_bits_4_values = metrics.get_SVD_s(image_processing.rgb_to_LAB_L_low_bits(img))
-
-    # check mode to normalize data
-    if p_mode == 'svdne':
-
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
-
-        l_values = image_processing.normalize_arr_with_range(low_bits_4_values, min, max)
-
-    elif p_mode == 'svdn':
-        l_values = image_processing.normalize_arr(low_bits_4_values)
-    else:
-        l_values = low_bits_4_values
-
-
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
-
-    # get prediction of model
-    prediction = model.predict([test_data])[0]
-
-    print(prediction)
-
-
-if __name__== "__main__":
-    main()

+ 0 - 78
metrics_predictions/predict_noisy_image_svd_low_bits_4_shifted_2.py

@@ -1,78 +0,0 @@
-from sklearn.externals import joblib
-
-import numpy as np
-
-from ipfml import image_processing
-from ipfml import metrics
-from PIL import Image
-
-import sys, os, getopt
-
-min_max_file_path = 'fichiersSVD_light/low_bits_4_shifted_2_min_max_values'
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:o", ["help=", "image=", "interval=", "model=", "mode="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
-                assert False, "Mode not recognized"
-        else:
-            assert False, "unhandled option"
-
-    # load of model file
-    model = joblib.load(p_model_file)
-
-    # load image
-    img = Image.open(p_img_file)
-    low_bits_4_shifted_2_values = metrics.get_SVD_s(image_processing.rgb_to_LAB_L_bits(block_used, (3, 6)))
-
-    # check mode to normalize data
-    if p_mode == 'svdne':
-
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
-
-        l_values = image_processing.normalize_arr_with_range(low_bits_4_shifted_2_values, min, max)
-
-    elif p_mode == 'svdn':
-        l_values = image_processing.normalize_arr(low_bits_4_shifted_2_values)
-    else:
-        l_values = low_bits_4_shifted_2_values
-
-
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
-
-    # get prediction of model
-    prediction = model.predict([test_data])[0]
-
-    print(prediction)
-
-
-if __name__== "__main__":
-    main()

+ 0 - 78
metrics_predictions/predict_noisy_image_svd_low_bits_5.py

@@ -1,78 +0,0 @@
-from sklearn.externals import joblib
-
-from ipfml import image_processing
-from ipfml import metrics
-
-from PIL import Image
-
-import sys, os, getopt
-import numpy as np
-
-min_max_file_path = 'fichiersSVD_light/low_bits_5_min_max_values'
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:o", ["help=", "image=", "interval=", "model=", "mode="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file =  os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
-                assert False, "Mode not recognized"
-        else:
-            assert False, "unhandled option"
-
-    # load of model file
-    model = joblib.load(p_model_file)
-
-    # load image
-    img = Image.open(p_img_file)
-    low_bits_5_values = metrics.get_SVD_s(image_processing.rgb_to_LAB_L_low_bits(img, 31))
-
-    # check mode to normalize data
-    if p_mode == 'svdne':
-
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
-
-        l_values = image_processing.normalize_arr_with_range(low_bits_5_values, min, max)
-
-    elif p_mode == 'svdn':
-        l_values = image_processing.normalize_arr(low_bits_5_values)
-    else:
-        l_values = low_bits_6_values
-
-
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
-
-    # get prediction of model
-    prediction = model.predict([test_data])[0]
-
-    print(prediction)
-
-
-if __name__== "__main__":
-    main()

+ 0 - 78
metrics_predictions/predict_noisy_image_svd_low_bits_6.py

@@ -1,78 +0,0 @@
-from sklearn.externals import joblib
-
-from ipfml import image_processing
-from ipfml import metrics
-
-from PIL import Image
-
-import sys, os, getopt
-import numpy as np
-
-min_max_file_path = 'fichiersSVD_light/low_bits_6_min_max_values'
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:o", ["help=", "image=", "interval=", "model=", "mode="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file =  os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
-                assert False, "Mode not recognized"
-        else:
-            assert False, "unhandled option"
-
-    # load of model file
-    model = joblib.load(p_model_file)
-
-    # load image
-    img = Image.open(p_img_file)
-    low_bits_6_values = metrics.get_SVD_s(image_processing.rgb_to_LAB_L_low_bits(img, 63))
-
-    # check mode to normalize data
-    if p_mode == 'svdne':
-
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
-
-        l_values = image_processing.normalize_arr_with_range(low_bits_6_values, min, max)
-
-    elif p_mode == 'svdn':
-        l_values = image_processing.normalize_arr(low_bits_6_values)
-    else:
-        l_values = low_bits_6_values
-
-
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
-
-    # get prediction of model
-    prediction = model.predict([test_data])[0]
-
-    print(prediction)
-
-
-if __name__== "__main__":
-    main()

+ 0 - 86
metrics_predictions/predict_noisy_image_svd_mscn.py

@@ -1,86 +0,0 @@
-from sklearn.externals import joblib
-
-import numpy as np
-
-from ipfml import image_processing
-from ipfml import metrics
-from PIL import Image
-from skimage import color
-
-import sys, os, getopt
-
-min_max_file_path = 'fichiersSVD_light/mscn_min_max_values'
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd_mscn.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:o", ["help=", "image=", "interval=", "model=", "mode="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python predict_noisy_image_svd_mscn.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_nopredict_noisy_image_svd_mscnisy_image.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
-                assert False, "Mode not recognized"
-        else:
-            assert False, "unhandled option"
-
-    # load of model file
-    model = joblib.load(p_model_file) 
-
-    # load image
-    img = Image.open(p_img_file)
-    
-    img_gray = np.array(color.rgb2gray(np.asarray(img))*255, 'uint8')
-    img_mscn = image_processing.calculate_mscn_coefficients(img_gray, 7)
-    img_mscn_norm = image_processing.normalize_2D_arr(img_mscn)
-    img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
-
-    SVD_MSCN = metrics.get_SVD_s(img_mscn_gray)
-
-
-    # check mode to normalize data
-    if p_mode == 'svdne':
-        
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
-
-        l_values = image_processing.normalize_arr_with_range(SVD_MSCN, min, max)
-
-    elif p_mode == 'svdn':
-        l_values = image_processing.normalize_arr(SVD_MSCN)
-    else:
-        l_values = SVD_MSCN
-
-    
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
-
-    # get prediction of model
-    prediction = model.predict([test_data])[0]
-
-    print(prediction)
-
-
-if __name__== "__main__":
-    main()

+ 0 - 88
metrics_predictions/predict_noisy_image_svd_mscn_revisited.py

@@ -1,88 +0,0 @@
-from sklearn.externals import joblib
-
-import numpy as np
-
-from ipfml import image_processing
-from ipfml import metrics
-from PIL import Image
-
-import sys, os, getopt
-
-min_max_file_path = 'fichiersSVD_light/mscn_revisited_min_max_values'
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd_mscn.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:o", ["help=", "image=", "interval=", "model=", "mode="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python predict_noisy_image_svd_mscn.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_nopredict_noisy_image_svd_mscnisy_image.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --mode ["svdn", "svdne"]')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.join(os.path.dirname(__file__),'../'), a)
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
-                assert False, "Mode not recognized"
-        else:
-            assert False, "unhandled option"
-
-    # load of model file
-    model = joblib.load(p_model_file) 
-
-    # load image
-    img = Image.open(p_img_file)
-    img_mscn = image_processing.rgb_to_mscn(img)
-
-    # save tmp as img
-    img_output = Image.fromarray(img_mscn.astype('uint8'), 'L')
-    mscn_file_path = '/tmp/mscn_revisited_img.png'
-    img_output.save(mscn_file_path)
-    img_block = Image.open(mscn_file_path)
-
-    # extract from temp image
-    SVD_MSCN_REVISITED = metrics.get_SVD_s(img_block)
-
-
-    # check mode to normalize data
-    if p_mode == 'svdne':
-        
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
-
-        l_values = image_processing.normalize_arr_with_range(SVD_MSCN_REVISITED, min, max)
-
-    elif p_mode == 'svdn':
-        l_values = image_processing.normalize_arr(SVD_MSCN_REVISITED)
-    else:
-        l_values = SVD_MSCN_REVISITED
-
-    
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
-
-    # get prediction of model
-    prediction = model.predict([test_data])[0]
-
-    print(prediction)
-
-
-if __name__== "__main__":
-    main()

+ 0 - 167
models/ensemble_model_train.py

@@ -1,167 +0,0 @@
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import GridSearchCV
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier, VotingClassifier
-
-import sklearn.svm as svm
-from sklearn.utils import shuffle
-from sklearn.externals import joblib
-from sklearn.metrics import accuracy_score, f1_score
-from sklearn.model_selection import cross_val_score
-
-import numpy as np
-import pandas as pd
-import sys, os, getopt
-
-saved_models_folder = 'saved_models'
-current_dirpath = os.getcwd()
-output_model_folder = os.path.join(current_dirpath, saved_models_folder)
-
-def get_best_model(X_train, y_train):
-
-    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
-    gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
-    param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
-
-    svc = svm.SVC(probability=True)
-    clf = GridSearchCV(svc, param_grid, cv=10, scoring='accuracy', verbose=10)
-
-    clf.fit(X_train, y_train)
-
-    model = clf.best_estimator_
-
-    return model
-
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python ensemble_model_train.py --data xxxx --output xxxx')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hd:o", ["help=", "data=", "output="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python ensemble_model_train.py --data xxxx --output xxxx')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python ensemble_model_train.py --data xxxx --output xxxx')
-            sys.exit()
-        elif o in ("-d", "--data"):
-            p_data_file = a
-        elif o in ("-o", "--output"):
-            p_output = a
-        else:
-            assert False, "unhandled option"
-
-    if not os.path.exists(output_model_folder):
-        os.makedirs(output_model_folder)
-
-    ########################
-    # 1. Get and prepare data
-    ########################
-    dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
-    dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
-
-    # default first shuffle of data
-    dataset_train = shuffle(dataset_train)
-    dataset_test = shuffle(dataset_test)
-
-    # get dataset with equal number of classes occurences
-    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
-    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
-    nb_noisy_train = len(noisy_df_train.index)
-
-    noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
-    not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
-    nb_noisy_test = len(noisy_df_test.index)
-
-    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
-    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
-
-    # shuffle data another time
-    final_df_train = shuffle(final_df_train)
-    final_df_test = shuffle(final_df_test)
-
-    final_df_train_size = len(final_df_train.index)
-    final_df_test_size = len(final_df_test.index)
-
-    # use of the whole data set for training
-    x_dataset_train = final_df_train.ix[:,1:]
-    x_dataset_test = final_df_test.ix[:,1:]
-
-    y_dataset_train = final_df_train.ix[:,0]
-    y_dataset_test = final_df_test.ix[:,0]
-
-    #######################
-    # 2. Construction of the model : Ensemble model structure
-    #######################
-
-    svm_model = get_best_model(x_dataset_train, y_dataset_train)
-
-    lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
-    rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
-
-    ensemble_model = VotingClassifier(estimators=[
-       ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)], voting='soft', weights=[1,1,1])
-
-    #######################
-    # 3. Fit model : use of cross validation to fit model
-    #######################
-    print("-------------------------------------------")
-    print("Train dataset size: ", final_df_train_size)
-    ensemble_model.fit(x_dataset_train, y_dataset_train)
-    val_scores = cross_val_score(ensemble_model, x_dataset_train, y_dataset_train, cv=5)
-    print("Accuracy: %0.2f (+/- %0.2f)" % (val_scores.mean(), val_scores.std() * 2))
-
-    ######################
-    # 4. Test : Validation and test dataset from .test dataset
-    ######################
-
-    # we need to specify validation size to 20% of whole dataset
-    val_set_size = int(final_df_train_size/3)
-    test_set_size = val_set_size
-
-    total_validation_size = val_set_size + test_set_size
-
-    if final_df_test_size > total_validation_size:
-        x_dataset_test = x_dataset_test[0:total_validation_size]
-        y_dataset_test = y_dataset_test[0:total_validation_size]
-
-    X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
-
-    y_test_model = ensemble_model.predict(X_test)
-    y_val_model = ensemble_model.predict(X_val)
-
-    val_accuracy = accuracy_score(y_val, y_val_model)
-    test_accuracy = accuracy_score(y_test, y_test_model)
-
-    val_f1 = f1_score(y_val, y_val_model)
-    test_f1 = f1_score(y_test, y_test_model)
-
-
-    ###################
-    # 5. Output : Print and write all information in csv
-    ###################
-
-    print("Validation dataset size ", val_set_size)
-    print("Validation: ", val_accuracy)
-    print("Validation F1: ", val_f1)
-    print("Test dataset size ", test_set_size)
-    print("Test: ", val_accuracy)
-    print("Test F1: ", test_f1)
-
-
-    ##################
-    # 6. Save model : create path if not exists
-    ##################
-
-    if not os.path.exists(saved_models_folder):
-        os.makedirs(saved_models_folder)
-
-    joblib.dump(ensemble_model, output_model_folder + '/' + p_output + '.joblib')
-
-if __name__== "__main__":
-    main()

+ 0 - 174
models/ensemble_model_v2_train.py

@@ -1,174 +0,0 @@
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import GridSearchCV
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier, VotingClassifier
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.ensemble import GradientBoostingClassifier
-
-import sklearn.svm as svm
-from sklearn.utils import shuffle
-from sklearn.externals import joblib
-from sklearn.metrics import accuracy_score, f1_score
-
-from sklearn.model_selection import cross_val_score
-
-import numpy as np
-import pandas as pd
-import sys, os, getopt
-
-saved_models_folder = 'saved_models'
-current_dirpath = os.getcwd()
-output_model_folder = os.path.join(current_dirpath, saved_models_folder)
-
-def get_best_model(X_train, y_train):
-    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
-    gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
-    param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
-
-    svc = svm.SVC(probability=True)
-    clf = GridSearchCV(svc, param_grid, cv=10, scoring='accuracy', verbose=10)
-
-    clf.fit(X_train, y_train)
-
-    model = clf.best_estimator_
-
-    return model
-
-
-def main():
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python ensemble_model_train_v2.py --data xxxx --output xxxx')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hd:o", ["help=", "data=", "output="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python ensemble_model_train_v2.py --data xxxx --output xxxx')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python ensemble_model_train_v2.py --data xxxx --output xxxx')
-            sys.exit()
-        elif o in ("-d", "--data"):
-            p_data_file = a
-        elif o in ("-o", "--output"):
-            p_output = a
-        else:
-            assert False, "unhandled option"
-
-    if not os.path.exists(output_model_folder):
-        os.makedirs(output_model_folder)
-
-    # 1. Get and prepare data
-    dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
-    dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
-
-    # default first shuffle of data
-    dataset_train = shuffle(dataset_train)
-    dataset_test = shuffle(dataset_test)
-
-    # get dataset with equal number of classes occurences
-    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
-    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
-    nb_noisy_train = len(noisy_df_train.index)
-
-    noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
-    not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
-    nb_noisy_test = len(noisy_df_test.index)
-
-    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
-    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
-
-    # shuffle data another time
-    final_df_train = shuffle(final_df_train)
-    final_df_test = shuffle(final_df_test)
-
-    final_df_train_size = len(final_df_train.index)
-    final_df_test_size = len(final_df_test.index)
-
-    # use of the whole data set for training
-    x_dataset_train = final_df_train.ix[:,1:]
-    x_dataset_test = final_df_test.ix[:,1:]
-
-    y_dataset_train = final_df_train.ix[:,0]
-    y_dataset_test = final_df_test.ix[:,0]
-
-
-    #######################
-    # 2. Construction of the model : Ensemble model structure
-    #######################
-
-    svm_model = get_best_model(x_dataset_train, y_dataset_train)
-    knc_model = KNeighborsClassifier(n_neighbors=2)
-    gbc_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
-    lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
-    rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
-
-    ensemble_model = VotingClassifier(estimators=[
-       ('lr', lr_model),
-       ('knc', knc_model),
-       ('gbc', gbc_model),
-       ('svm', svm_model),
-       ('rf', rf_model)],
-       voting='soft', weights=[1, 1, 1, 1, 1])
-
-
-    #######################
-    # 3. Fit model : use of cross validation to fit model
-    #######################
-    print("-------------------------------------------")
-    print("Train dataset size: ", final_df_train_size)
-    ensemble_model.fit(x_dataset_train, y_dataset_train)
-    val_scores = cross_val_score(ensemble_model, x_dataset_train, y_dataset_train, cv=5)
-    print("Accuracy: %0.2f (+/- %0.2f)" % (val_scores.mean(), val_scores.std() * 2))
-
-    ######################
-    # 4. Test : Validation and test dataset from .test dataset
-    ######################
-
-    # we need to specify validation size to 20% of whole dataset
-    val_set_size = int(final_df_train_size/3)
-    test_set_size = val_set_size
-
-    total_validation_size = val_set_size + test_set_size
-
-    if final_df_test_size > total_validation_size:
-        x_dataset_test = x_dataset_test[0:total_validation_size]
-        y_dataset_test = y_dataset_test[0:total_validation_size]
-
-    X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
-
-    y_test_model = ensemble_model.predict(X_test)
-    y_val_model = ensemble_model.predict(X_val)
-
-    val_accuracy = accuracy_score(y_val, y_val_model)
-    test_accuracy = accuracy_score(y_test, y_test_model)
-
-    val_f1 = f1_score(y_val, y_val_model)
-    test_f1 = f1_score(y_test, y_test_model)
-
-    ###################
-    # 5. Output : Print and write all information in csv
-    ###################
-
-    print("Validation dataset size ", val_set_size)
-    print("Validation: ", val_accuracy)
-    print("Validation F1: ", val_f1)
-    print("Test dataset size ", test_set_size)
-    print("Test: ", val_accuracy)
-    print("Test F1: ", test_f1)
-
-    ##################
-    # 6. Save model : create path if not exists
-    ##################
-
-    # create path if not exists
-    if not os.path.exists(saved_models_folder):
-        os.makedirs(saved_models_folder)
-
-    joblib.dump(ensemble_model, output_model_folder + '/' +  p_output + '.joblib')
-
-if __name__== "__main__":
-    main()

+ 75 - 0
modules/models.py

@@ -0,0 +1,75 @@
+from sklearn.model_selection import GridSearchCV
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier, VotingClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.ensemble import GradientBoostingClassifier
+import sklearn.svm as svm
+
+
+def _get_best_model(X_train, y_train):
+
+    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
+    gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
+    param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
+
+    svc = svm.SVC(probability=True)
+    clf = GridSearchCV(svc, param_grid, cv=10, scoring='accuracy', verbose=10)
+
+    clf.fit(X_train, y_train)
+
+    model = clf.best_estimator_
+
+    return model
+
+def svm_model(X_train, y_train):
+
+    return _get_best_model(X_train, y_train)
+
+
+def ensemble_model(X_train, y_train):
+
+    svm_model = _get_best_model(X_train, y_train)
+
+    lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
+    rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
+
+    ensemble_model = VotingClassifier(estimators=[
+       ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)], voting='soft', weights=[1,1,1])
+
+    ensemble_model.fit(X_train, y_train)
+
+    return ensemble_model
+
+
+def ensemble_model_v2(X_train, y_train):
+
+    svm_model = _get_best_model(X_train, y_train)
+    knc_model = KNeighborsClassifier(n_neighbors=2)
+    gbc_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
+    lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
+    rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
+
+    ensemble_model = VotingClassifier(estimators=[
+       ('lr', lr_model),
+       ('knc', knc_model),
+       ('gbc', gbc_model),
+       ('svm', svm_model),
+       ('rf', rf_model)],
+       voting='soft', weights=[1, 1, 1, 1, 1])
+
+    ensemble_model.fit(X_train, y_train)
+
+    return ensemble_model
+
+def get_trained_model(choice, X_train, y_train):
+
+    if choice == 'svm_model':
+        return svm_model(X_train, y_train)
+
+    if choice == 'ensemble_model':
+        return ensemble_model(X_train, y_train)
+
+    if choice == 'ensemble_model_v2':
+        return ensemble_model_v2(X_train, y_train)
+
+

+ 5 - 4
modules/utils/config.py

@@ -1,19 +1,20 @@
 import numpy as np
 
-config_filename                 = "config"
 zone_folder                     = "zone"
-min_max_filename_extension      = "_min_max_values"
 output_data_folder              = 'data'
 dataset_path                    = 'fichiersSVD_light'
-seuil_expe_filename             = 'seuilExpe'
 threshold_map_folder            = 'threshold_map'
 models_information_folder       = 'models_info'
 saved_models_folder             = 'saved_models'
+min_max_custom_folder           = 'custom_norm'
+
 csv_model_comparisons_filename  = "models_comparisons.csv"
+seuil_expe_filename             = 'seuilExpe'
+min_max_filename_extension      = "_min_max_values"
+config_filename                 = "config"
 
 models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2"]
 
-
 # define all scenes values
 scenes_names                    = ['Appart1opt02', 'Bureau1', 'Cendrier', 'Cuisine01', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
 scenes_indices                  = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']

+ 11 - 22
modules/utils/data_type.py

@@ -1,4 +1,4 @@
-from ipfml import image_processing, metrics
+from ipfml import processing, metrics
 from PIL import Image
 from skimage import color
 
@@ -13,11 +13,11 @@ def get_svd_data(data_type, block):
 
         block_file_path = '/tmp/lab_img.png'
         block.save(block_file_path)
-        data = image_processing.get_LAB_L_SVD_s(Image.open(block_file_path))
+        data = processing.get_LAB_L_SVD_s(Image.open(block_file_path))
 
     if data_type == 'mscn_revisited':
 
-        img_mscn_revisited = image_processing.rgb_to_mscn(block)
+        img_mscn_revisited = processing.rgb_to_mscn(block)
 
         # save tmp as img
         img_output = Image.fromarray(img_mscn_revisited.astype('uint8'), 'L')
@@ -31,8 +31,8 @@ def get_svd_data(data_type, block):
     if data_type == 'mscn':
 
         img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
-        img_mscn = image_processing.calculate_mscn_coefficients(img_gray, 7)
-        img_mscn_norm = image_processing.normalize_2D_arr(img_mscn)
+        img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
+        img_mscn_norm = processing.normalize_2D_arr(img_mscn)
 
         img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
 
@@ -40,43 +40,32 @@ def get_svd_data(data_type, block):
 
     if data_type == 'low_bits_6':
 
-        low_bits_6 = image_processing.rgb_to_LAB_L_low_bits(block, 63)
-
-        # extract from temp image
+        low_bits_6 = processing.rgb_to_LAB_L_low_bits(block, 6)
         data = metrics.get_SVD_s(low_bits_6)
 
     if data_type == 'low_bits_5':
 
-        low_bits_5 = image_processing.rgb_to_LAB_L_low_bits(block, 31)
-
-        # extract from temp image
+        low_bits_5 = processing.rgb_to_LAB_L_low_bits(block, 5)
         data = metrics.get_SVD_s(low_bits_5)
 
-
     if data_type == 'low_bits_4':
 
-        low_bits_4 = image_processing.rgb_to_LAB_L_low_bits(block)
-
-        # extract from temp image
+        low_bits_4 = processing.rgb_to_LAB_L_low_bits(block, 4)
         data = metrics.get_SVD_s(low_bits_4)
 
     if data_type == 'low_bits_3':
 
-        low_bits_3 = image_processing.rgb_to_LAB_L_low_bits(block, 7)
-
-        # extract from temp image
+        low_bits_3 = processing.rgb_to_LAB_L_low_bits(block, 3)
         data = metrics.get_SVD_s(low_bits_3)
 
     if data_type == 'low_bits_2':
 
-        low_bits_2 = image_processing.rgb_to_LAB_L_low_bits(block, 3)
-
-        # extract from temp image
+        low_bits_2 = processing.rgb_to_LAB_L_low_bits(block, 2)
         data = metrics.get_SVD_s(low_bits_2)
 
     if data_type == 'low_bits_4_shifted_2':
 
-        data = metrics.get_SVD_s(image_processing.rgb_to_LAB_L_bits(block, (3, 6)))
+        data = metrics.get_SVD_s(processing.rgb_to_LAB_L_bits(block, (3, 6)))
 
     return data
 

+ 102 - 0
predict_noisy_image_svd.py

@@ -0,0 +1,102 @@
+from sklearn.externals import joblib
+
+import numpy as np
+
+from ipfml import processing
+from PIL import Image
+
+import sys, os, getopt
+
+from modules.utils import config as cfg
+from modules.utils import data_type as dt
+
+path                  = cfg.dataset_path
+min_max_ext           = cfg.min_max_filename_extension
+metric_choices       = cfg.metric_choices_labels
+normalization_choices = cfg.normalization_choices
+
+custom_min_max_folder = cfg.min_max_custom_folder
+
+def main():
+
+    p_custom = False
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python predict_noisy_image_svd.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --metric lab --mode ["svdn", "svdne"] --custom min_max_file')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:m:o:c", ["help=", "image=", "interval=", "model=", "metric=", "mode=", "custom="])
+    except getopt.GetoptError:
+        # print help information and exit
+        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --metric lab --mode ["svdn", "svdne"] --custom min_max_file')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --metric lab --mode ["svdn", "svdne"] --custom min_max_file')
+            sys.exit()
+        elif o in ("-i", "--image"):
+            p_img_file = os.path.join(os.path.dirname(__file__), a)
+        elif o in ("-t", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+        elif o in ("-m", "--model"):
+            p_model_file = os.path.join(os.path.dirname(__file__), a)
+        elif o in ("-m", "--metric"):
+            p_metric = a
+
+            if not p_metric in metric_choices:
+                assert False, "Unknow metric choice"
+        elif o in ("-o", "--mode"):
+            p_mode = a
+
+            if not p_mode in normalization_choices:
+                assert False, "Mode of normalization not recognized"
+        elif o in ("-m", "--custom"):
+            p_custom = a
+
+        else:
+            assert False, "unhandled option"
+
+    # load of model file
+    model = joblib.load(p_model_file)
+
+    # load image
+    img = Image.open(p_img_file)
+
+    data = dt.get_svd_data(p_metric, img)
+
+    # check mode to normalize data
+    if p_mode == 'svdne':
+
+        # set min_max_filename if custom use
+        if p_custom:
+            min_max_filename = custom_min_max_folder + '/' +  p_custom
+        else:
+            min_max_file_path = path + '/' + p_metric + min_max_ext
+
+        # need to read min_max_file
+        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
+        with open(file_path, 'r') as f:
+            min = float(f.readline().replace('\n', ''))
+            max = float(f.readline().replace('\n', ''))
+
+        l_values = processing.normalize_arr_with_range(data, min, max)
+
+    elif p_mode == 'svdn':
+        l_values = processing.normalize_arr(data)
+    else:
+        l_values = data
+
+
+    # get interval values
+    begin, end = p_interval
+    test_data = l_values[begin:end]
+
+    # get prediction of model
+    prediction = model.predict([test_data])[0]
+
+    # output expected from others scripts
+    print(prediction)
+
+if __name__== "__main__":
+    main()

+ 19 - 10
predict_seuil_expe.py

@@ -2,7 +2,7 @@ from sklearn.externals import joblib
 
 import numpy as np
 
-from ipfml import image_processing
+from ipfml import processing
 from PIL import Image
 
 import sys, os, getopt
@@ -27,19 +27,21 @@ current_dirpath = os.getcwd()
 
 def main():
 
+    p_custom = False
+
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
-        print('python predict_seuil_expe.py --interval "0,20" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx')
+        print('python predict_seuil_expe.py --interval "0,20" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx --custom min_max_filename')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ht:m:o:l", ["help=", "interval=", "model=", "mode=", "metric=" "limit_detection="])
+        opts, args = getopt.getopt(sys.argv[1:], "ht:m:o:l:c", ["help=", "interval=", "model=", "mode=", "metric=" "limit_detection=", "custom="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python predict_seuil_expe.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx')
+        print('python predict_seuil_expe.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx --custom min_max_filename')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python predict_seuil_expe.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx')
+            print('python predict_seuil_expe.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx --custom min_max_filename')
             sys.exit()
         elif o in ("-t", "--interval"):
             p_interval = a
@@ -55,13 +57,14 @@ def main():
             p_metric = a
         elif o in ("-l", "--limit_detection"):
             p_limit = int(a)
+        elif o in ("-c", "--custom"):
+            p_custom = a
         else:
             assert False, "unhandled option"
 
     scenes = os.listdir(scenes_path)
 
-    if min_max_filename in scenes:
-        scenes.remove(min_max_filename)
+    scenes = [s for s in scenes if not min_max_filename in s]
 
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(scenes):
@@ -118,7 +121,7 @@ def main():
             img_path = os.path.join(scene_path, prefix_image_name + current_counter_index_str + ".png")
 
             current_img = Image.open(img_path)
-            img_blocks = image_processing.divide_in_blocks(current_img, (200, 200))
+            img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
 
             check_all_done = all(d == True for d in threshold_expes_detected)
@@ -131,10 +134,16 @@ def main():
                     tmp_file_path = tmp_filename.replace('__model__',  p_model_file.split('/')[-1].replace('.joblib', '_'))
                     block.save(tmp_file_path)
 
-                    python_cmd = "python metrics_predictions/predict_noisy_image_svd_" + p_metric + ".py --image " + tmp_file_path + \
+                    python_cmd = "python predict_noisy_image_svd.py --image " + tmp_file_path + \
                                     " --interval '" + p_interval + \
                                     "' --model " + p_model_file  + \
-                                    " --mode " + p_mode
+                                    " --mode " + p_mode + \
+                                    " --metric " + p_metric
+
+                    # specify use of custom file for min max normalization
+                    if p_custom:
+                        python_cmd = python_cmd + ' --custom ' + p_custom
+
 
                     ## call command ##
                     p = subprocess.Popen(python_cmd, stdout=subprocess.PIPE, shell=True)

+ 19 - 11
predict_seuil_expe_maxwell.py

@@ -2,7 +2,7 @@ from sklearn.externals import joblib
 
 import numpy as np
 
-from ipfml import image_processing
+from ipfml import processing
 from PIL import Image
 
 import sys, os, getopt
@@ -27,22 +27,24 @@ tmp_filename              = '/tmp/__model__img_to_predict.png'
 
 current_dirpath = os.getcwd()
 
-
 def main():
 
+    # by default..
+    p_custom = False
+
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
-        print('python predict_seuil_expe_maxwell.py --interval "0,20" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx')
+        print('python predict_seuil_expe_maxwell.py --interval "0,20" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx --custom min_max_filename')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ht:m:o:l", ["help=", "interval=", "model=", "mode=", "metric=", "limit_detection="])
+        opts, args = getopt.getopt(sys.argv[1:], "ht:m:o:l:c", ["help=", "interval=", "model=", "mode=", "metric=", "limit_detection=", "custom="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python predict_seuil_expe_maxwell.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx')
+        print('python predict_seuil_expe_maxwell.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx --custom min_max_filename')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python predict_seuil_expe_maxwell.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx')
+            print('python predict_seuil_expe_maxwell.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx --custom min_max_filename')
             sys.exit()
         elif o in ("-t", "--interval"):
             p_interval = a
@@ -58,13 +60,14 @@ def main():
             p_metric = a
         elif o in ("-l", "--limit_detection"):
             p_limit = int(a)
+        elif o in ("-c", "--custom"):
+            p_custom = a
         else:
             assert False, "unhandled option"
 
     scenes = os.listdir(scenes_path)
 
-    if min_max_filename in scenes:
-        scenes.remove(min_max_filename)
+    scenes = [s for s in scenes if s in maxwell_scenes]
 
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(scenes):
@@ -124,7 +127,7 @@ def main():
                 img_path = os.path.join(scene_path, prefix_image_name + current_counter_index_str + ".png")
 
                 current_img = Image.open(img_path)
-                img_blocks = image_processing.divide_in_blocks(current_img, (200, 200))
+                img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
 
                 check_all_done = all(d == True for d in threshold_expes_detected)
@@ -137,10 +140,15 @@ def main():
                         tmp_file_path = tmp_filename.replace('__model__',  p_model_file.split('/')[-1].replace('.joblib', '_'))
                         block.save(tmp_file_path)
 
-                        python_cmd = "python metrics_predictions/predict_noisy_image_svd_" + p_metric + ".py --image " + tmp_file_path + \
+                        python_cmd = "python predict_noisy_image_svd.py --image " + tmp_file_path + \
                                         " --interval '" + p_interval + \
                                         "' --model " + p_model_file  + \
-                                        " --mode " + p_mode
+                                        " --mode " + p_mode + \
+                                        " --metric " + p_metric
+
+                        # specify use of custom file for min max normalization
+                        if p_custom:
+                            python_cmd = python_cmd + ' --custom ' + p_custom
 
                         ## call command ##
                         p = subprocess.Popen(python_cmd, stdout=subprocess.PIPE, shell=True)

+ 25 - 15
predict_seuil_expe_maxwell_curve.py

@@ -2,7 +2,7 @@ from sklearn.externals import joblib
 
 import numpy as np
 
-from ipfml import image_processing
+from ipfml import processing
 from PIL import Image
 
 import sys, os, getopt
@@ -30,19 +30,21 @@ current_dirpath = os.getcwd()
 
 def main():
 
+    p_custom = False
+
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
-        print('python predict_seuil_expe_maxwell.py --interval "0,20" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx')
+        print('python predict_seuil_expe_maxwell_curve.py --interval "0,20" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx --custom min_max_filename')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ht:m:o:l", ["help=", "interval=", "model=", "mode=", "metric=", "limit_detection="])
+        opts, args = getopt.getopt(sys.argv[1:], "ht:m:o:l:c", ["help=", "interval=", "model=", "mode=", "metric=", "limit_detection=", "custom="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python predict_seuil_expe_maxwell.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx')
+        print('python predict_seuil_expe_maxwell_curve.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx --custom min_max_filename')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python predict_seuil_expe_maxwell.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx')
+            print('python predict_seuil_expe_maxwell_curve.py --interval "xx,xx" --model path/to/xxxx.joblib --mode svdn --metric lab --limit_detection xx --custom min_max_filename')
             sys.exit()
         elif o in ("-t", "--interval"):
             p_interval = a
@@ -58,13 +60,16 @@ def main():
             p_metric = a
         elif o in ("-l", "--limit_detection"):
             p_limit = int(a)
+        elif o in ("-c", "--custom"):
+            p_custom = a
         else:
             assert False, "unhandled option"
 
     scenes = os.listdir(scenes_path)
 
-    if min_max_filename in scenes:
-        scenes.remove(min_max_filename)
+    scenes = [s for s in scenes if s in maxwell_scenes]
+
+    print(scenes)
 
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(scenes):
@@ -122,7 +127,7 @@ def main():
                 img_path = os.path.join(scene_path, prefix_image_name + current_counter_index_str + ".png")
 
                 current_img = Image.open(img_path)
-                img_blocks = image_processing.divide_in_blocks(current_img, (200, 200))
+                img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
                 for id_block, block in enumerate(img_blocks):
 
@@ -132,10 +137,15 @@ def main():
                         tmp_file_path = tmp_filename.replace('__model__',  p_model_file.split('/')[-1].replace('.joblib', '_'))
                         block.save(tmp_file_path)
 
-                        python_cmd = "python metrics_predictions/predict_noisy_image_svd_" + p_metric + ".py --image " + tmp_file_path + \
+                        python_cmd = "python predict_noisy_image_svd.py --image " + tmp_file_path + \
                                         " --interval '" + p_interval + \
                                         "' --model " + p_model_file  + \
-                                        " --mode " + p_mode
+                                        " --mode " + p_mode + \
+                                        " --metric " + p_metric
+
+                        # specify use of custom file for min max normalization
+                        if p_custom:
+                            python_cmd = python_cmd + ' --custom ' + p_custom
 
                         ## call command ##
                         p = subprocess.Popen(python_cmd, stdout=subprocess.PIPE, shell=True)
@@ -160,13 +170,13 @@ def main():
             # end of scene => display of results
 
             # construct path using model name for saving threshold map folder
-            model_treshold_path = os.path.join(threshold_map_folder, p_model_file.split('/')[-1].replace('.joblib', ''))
+            model_threshold_path = os.path.join(threshold_map_folder, p_model_file.split('/')[-1].replace('.joblib', ''))
 
             # create threshold model path if necessary
-            if not os.path.exists(model_treshold_path):
-                os.makedirs(model_treshold_path)
+            if not os.path.exists(model_threshold_path):
+                os.makedirs(model_threshold_path)
 
-            map_filename = os.path.join(model_treshold_path, simulation_curves_zones + folder_scene)
+            map_filename = os.path.join(model_threshold_path, simulation_curves_zones + folder_scene)
             f_map = open(map_filename, 'w')
 
             for line in block_predictions_str:
@@ -176,7 +186,7 @@ def main():
             print("Scene " + str(id_scene + 1) + "/" + str(len(maxwell_scenes)) + " Done..")
             print("------------------------")
 
-            print("Model predictions are saved into %s" map_filename)
+            print("Model predictions are saved into %s" % map_filename)
             time.sleep(10)
 
 

+ 2 - 3
run_maxwell_simulation.sh

@@ -32,7 +32,6 @@ for size in {"4","8","16","26","32","40"}; do
                      for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
                         FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
-
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
 
                         if grep -q "${MODEL_NAME}" "${simulate_models}"; then
@@ -41,9 +40,9 @@ for size in {"4","8","16","26","32","40"}; do
                             # by default regenerate model
                             python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
 
-                            python models/${model}_train.py --data ${FILENAME} --output ${MODEL_NAME}
+                            python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
-                            python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2'
+                            python predict_seuil_expe_maxwell_curve.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2'
 
                             python save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
 

+ 63 - 0
run_maxwell_simulation_custom.sh

@@ -0,0 +1,63 @@
+#! bin/bash
+
+# file which contains model names we want to use for simulation
+simulate_models="simulate_models.csv"
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+VECTOR_SIZE=200
+
+for size in {"4","8","16","26","32","40"}; do
+    for metric in {"lab","mscn","mscn_revisited","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
+
+        half=$(($size/2))
+        start=-$half
+
+        for counter in {0..4}; do
+             end=$(($start+$size))
+
+             if [ "$end" -gt "$VECTOR_SIZE" ]; then
+                 start=$(($VECTOR_SIZE-$size))
+                 end=$(($VECTOR_SIZE))
+             fi
+
+             if [ "$start" -lt "0" ]; then
+                 start=$((0))
+                 end=$(($size))
+             fi
+
+             for nb_zones in {4,6,8,10,12,14}; do
+
+                 for mode in {"svd","svdn","svdne"}; do
+                     for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+                        FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                        MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                        CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
+
+                        if grep -q "${MODEL_NAME}" "${simulate_models}"; then
+                            echo "Run simulation for model ${MODEL_NAME}"
+
+                            # by default regenerate model
+                            python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --norm 0 --sep ';' --rowindex '0' --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                            python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                            python predict_seuil_expe_maxwell_curve.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                            python save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+
+                        fi
+                    done
+                done
+            done
+
+            if [ "$counter" -eq "0" ]; then
+                start=$(($start+50-$half))
+            else
+                start=$(($start+50))
+            fi
+
+        done
+    done
+done

+ 1 - 1
save_model_result_in_md.py

@@ -2,7 +2,7 @@ from sklearn.externals import joblib
 
 import numpy as np
 
-from ipfml import image_processing
+from ipfml import processing
 from PIL import Image
 
 import sys, os, getopt

+ 3 - 2
save_model_result_in_md_maxwell.py

@@ -7,7 +7,7 @@ from sklearn.model_selection import train_test_split
 import numpy as np
 import pandas as pd
 
-from ipfml import image_processing
+from ipfml import processing
 from PIL import Image
 
 import sys, os, getopt
@@ -53,7 +53,7 @@ def main():
 
             if p_mode != 'svdn' and p_mode != 'svdne' and p_mode != 'svd':
                 assert False, "Mode not recognized"
-        elif o in ("-c", "--metric"):
+        elif o in ("-m", "--metric"):
             p_metric = a
         else:
             assert False, "unhandled option"
@@ -64,6 +64,7 @@ def main():
     begin, end = p_interval
 
     bash_cmd = "bash testModelByScene_maxwell.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
+
     print(bash_cmd)
 
     ## call command ##

+ 8 - 1
testModelByScene_maxwell.sh

@@ -35,6 +35,13 @@ if [ -z "$5" ]
     exit 1
 fi
 
+if [ -z "$6" ]
+  then
+    echo "No sixth argument supplied"
+fi
+
+
+
 INPUT_BEGIN=$1
 INPUT_END=$2
 INPUT_MODEL=$3
@@ -60,4 +67,4 @@ for scene in {"A","D","G","H"}; do
 
   python prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_metric_${INPUT_METRIC}.prediction" --scene ${scene}
 
-done
+done

+ 25 - 31
models/svm_model_train.py

@@ -13,46 +13,41 @@ import numpy as np
 import pandas as pd
 import sys, os, getopt
 
-saved_models_folder = 'saved_models'
-current_dirpath = os.getcwd()
-output_model_folder = os.path.join(current_dirpath, saved_models_folder)
-
-def get_best_model(X_train, y_train):
-
-    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
-    gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
-    param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
+from modules.utils import config as cfg
+from modules import models as mdl
 
-    svc = svm.SVC(probability=True)
-    clf = GridSearchCV(svc, param_grid, cv=10, scoring='accuracy', verbose=10)
+saved_models_folder = cfg.saved_models_folder
+models_list         = cfg.models_names_list
 
-    clf.fit(X_train, y_train)
-
-    model = clf.best_estimator_
-
-    return model
+current_dirpath = os.getcwd()
+output_model_folder = os.path.join(current_dirpath, saved_models_folder)
 
 
 def main():
 
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python svm_model_train.py --data xxxx --output xxxx')
+    if len(sys.argv) <= 2:
+        print('python train_model.py --data xxxx --output xxxx --choice svm_model')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "hd:o", ["help=", "data=", "output="])
+        opts, args = getopt.getopt(sys.argv[1:], "hd:o:c", ["help=", "data=", "output=", "choice="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python svm_model_train.py --data xxxx --output xxxx')
+        print('python train_model.py --data xxxx --output xxxx --choice svm_model')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python svm_model_train.py --data xxxx --output xxxx')
+            print('python train_model.py --data xxxx --output xxxx --choice svm_model')
             sys.exit()
         elif o in ("-d", "--data"):
             p_data_file = a
         elif o in ("-o", "--output"):
             p_output = a
+        elif o in ("-c", "--choice"):
+            p_choice = a
+
+            if not p_choice in models_list:
+                assert False, "Unknown model choice"
+
         else:
             assert False, "unhandled option"
 
@@ -99,15 +94,14 @@ def main():
     # 2. Construction of the model : Ensemble model structure
     #######################
 
-    svm_model = get_best_model(x_dataset_train, y_dataset_train)
+    print("-------------------------------------------")
+    print("Train dataset size: ", final_df_train_size)
+    model = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
 
     #######################
     # 3. Fit model : use of cross validation to fit model
     #######################
-    print("-------------------------------------------")
-    print("Train dataset size: ", final_df_train_size)
-    svm_model.fit(x_dataset_train, y_dataset_train)
-    val_scores = cross_val_score(svm_model, x_dataset_train, y_dataset_train, cv=5)
+    val_scores = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
     print("Accuracy: %0.2f (+/- %0.2f)" % (val_scores.mean(), val_scores.std() * 2))
 
     ######################
@@ -126,8 +120,8 @@ def main():
 
     X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
 
-    y_test_model = svm_model.predict(X_test)
-    y_val_model = svm_model.predict(X_val)
+    y_test_model = model.predict(X_test)
+    y_val_model = model.predict(X_val)
 
     val_accuracy = accuracy_score(y_val, y_val_model)
     test_accuracy = accuracy_score(y_test, y_test_model)
@@ -147,15 +141,15 @@ def main():
     print("Test: ", val_accuracy)
     print("Test F1: ", test_f1)
 
+
     ##################
     # 6. Save model : create path if not exists
     ##################
 
-    # create path if not exists
     if not os.path.exists(saved_models_folder):
         os.makedirs(saved_models_folder)
 
-    joblib.dump(svm_model, output_model_folder + '/' + p_output + '.joblib')
+    joblib.dump(model, output_model_folder + '/' + p_output + '.joblib')
 
 if __name__== "__main__":
     main()