Parcourir la source

Merge branch 'release/v0.2.0'

Jérôme BUISINE il y a 5 ans
Parent
commit
55ed948965
54 fichiers modifiés avec 5441 ajouts et 411 suppressions
  1. 5 3
      .gitignore
  2. 650 0
      analysis/corr_analysys.ipynb
  3. 476 0
      analysis/mscn_analysis.ipynb
  4. 26 0
      analysis/save_img_block.py
  5. 303 0
      analysis/svd_entropy_analysis.ipynb
  6. 0 0
      analysis/svd_mean_rotations_view.py
  7. 537 0
      analysis/svd_reconstruction_analysis.ipynb
  8. 0 0
      analysis/svd_rotation_view.py
  9. 205 0
      analysis/svd_scenes_analysis.ipynb
  10. 332 0
      analysis/svd_zones_analysis.ipynb
  11. 59 42
      cnn_keras_svd.py
  12. 46 28
      display_simulation_curves.py
  13. 310 0
      display_svd_area_scenes.py
  14. 323 0
      display_svd_data_error_scene.py
  15. 29 63
      display_svd_data_scene.py
  16. 16 0
      fichiersSVD_light/Cuisine01/test_cut.py
  17. BIN
      fichiersSVD_light/SdbDroite/sceneSDB_Droite.zip
  18. 1 1
      generateAndTrain_maxwell.sh
  19. 1 1
      generateAndTrain_maxwell_custom.sh
  20. 74 0
      generateAndTrain_maxwell_custom_center.sh
  21. 74 0
      generateAndTrain_maxwell_custom_split.sh
  22. 79 93
      generate_data_model.py
  23. 385 0
      generate_data_model_corr_random.py
  24. 90 105
      generate_data_model_random.py
  25. 314 0
      generate_data_model_random_center.py
  26. 313 0
      generate_data_model_random_split.py
  27. 7 0
      generate_metrics_curve.sh
  28. 7 2
      modules/utils/config.py
  29. 66 2
      modules/utils/data.py
  30. 64 42
      predict_noisy_image_svd.py
  31. 44 5
      prediction_scene.py
  32. 7 0
      runAll_display_data_scene.sh
  33. 1 1
      runAll_maxwell.sh
  34. 1 1
      runAll_maxwell_area.sh
  35. 52 0
      runAll_maxwell_area_normed.sh
  36. 56 0
      runAll_maxwell_corr_custom.sh
  37. 1 1
      runAll_maxwell_custom.sh
  38. 24 0
      runAll_maxwell_custom_center.sh
  39. 24 0
      runAll_maxwell_custom_split.sh
  40. 55 0
      runAll_maxwell_keras.sh
  41. 54 0
      runAll_maxwell_keras_corr.sh
  42. 54 0
      runAll_maxwell_keras_corr_custom.sh
  43. 56 0
      runAll_maxwell_mscn_var.sh
  44. 1 1
      runAll_maxwell_sub_blocks_stats.sh
  45. 1 1
      runAll_maxwell_sub_blocks_stats_reduced.sh
  46. 1 1
      run_maxwell_simulation.sh
  47. 43 0
      run_maxwell_simulation_corr_custom.sh
  48. 1 1
      run_maxwell_simulation_custom.sh
  49. 43 0
      run_maxwell_simulation_keras_corr_custom.sh
  50. 38 0
      run_maxwell_simulation_keras_custom.sh
  51. 77 17
      save_model_result_in_md_maxwell.py
  52. 5 0
      simulate_models.csv
  53. 5 0
      simulate_models_keras.csv
  54. 5 0
      simulate_models_keras_corr.csv

+ 5 - 3
.gitignore

@@ -4,8 +4,11 @@ saved_models/*
 threshold_map/*
 threshold_map/*
 models_info/*
 models_info/*
 custom_norm/*
 custom_norm/*
+learned_zones/*
+corr_indices/*
+.ipynb_checkpoints
 
 
-simulate_models.csv
+# simulate_models.csv
 
 
 fichiersSVD_light/*/*/*.csv
 fichiersSVD_light/*/*/*.csv
 fichiersSVD_light/*_min_max_values
 fichiersSVD_light/*_min_max_values
@@ -14,8 +17,7 @@ fichiersSVD_light/*_min_max_values
 __pycache__
 __pycache__
 
 
 # by default avoid model files and png files
 # by default avoid model files and png files
-*.h5
+saved_models/*.h5
 *.png
 *.png
-!saved_models/*.h5
 !saved_models/*.png
 !saved_models/*.png
 .vscode
 .vscode

Fichier diff supprimé car celui-ci est trop grand
+ 650 - 0
analysis/corr_analysys.ipynb


Fichier diff supprimé car celui-ci est trop grand
+ 476 - 0
analysis/mscn_analysis.ipynb


+ 26 - 0
analysis/save_img_block.py

@@ -0,0 +1,26 @@
+from ipfml import processing, utils
+from skimage import transform
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+from PIL import Image
+
+data_folder = "../fichiersSVD_light"
+
+scene   = 'Cuisine01'
+mean_svd_values = []
+indices = ["00050", "00300", "01200"]
+id_block = 10
+
+def get_block_image(image_path):
+    image = Image.open(image_path)
+    blocks = processing.divide_in_blocks(image, (200, 200))
+    return blocks[id_block]
+
+for index in indices:
+    path = os.path.join(data_folder, scene + '/cuisine01_' + index + '.png')
+    img_block = get_block_image(path)
+    img_block.save(scene + '_' + str(index) + '_' + str(id_block) + '.png')
+
+

+ 303 - 0
analysis/svd_entropy_analysis.ipynb

@@ -0,0 +1,303 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ipfml import processing\n",
+    "from ipfml import utils\n",
+    "from ipfml import metrics\n",
+    "from PIL import Image\n",
+    "from scipy import signal\n",
+    "from skimage import color\n",
+    "import scipy.stats as stats\n",
+    "import seaborn as sns\n",
+    "import cv2\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_folder = \"../fichiersSVD_light\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# SVD analysis on zones of Synthesis Images "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Utils functions definition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_images_path(dict_data):\n",
+    "    scene = dict_data['name']\n",
+    "    prefix = dict_data['prefix']\n",
+    "    indices = dict_data['indices']\n",
+    "    \n",
+    "    images_path = []\n",
+    "    for index in indices:\n",
+    "        path = os.path.join(data_folder, os.path.join(scene, prefix + index + \".png\"))\n",
+    "        print(path)\n",
+    "        images_path.append(path)\n",
+    "    return images_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_images_zones(dict_data, images_path):\n",
+    "    \n",
+    "    zones_indices = dict_data['zones']\n",
+    "    zones_img = []\n",
+    "    \n",
+    "    for path in images_path:\n",
+    "        img = Image.open(path)\n",
+    "        zones = processing.divide_in_blocks(img, (200, 200))\n",
+    "        \n",
+    "        zones_list = []\n",
+    "        \n",
+    "        for id_zone in zones_indices:\n",
+    "            zones_list.append(zones[id_zone])\n",
+    "            \n",
+    "        zones_img.append(zones_list)\n",
+    "        \n",
+    "    return zones_img"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def display_sv_data(dict_data, zones_data, interval, _norm=False):\n",
+    "    \n",
+    "    scene_name = dict_data['name']\n",
+    "    image_indices = dict_data['indices']\n",
+    "    zones_indices = dict_data['zones']\n",
+    "    colors = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9']\n",
+    "    \n",
+    "    plt.figure(figsize=(25, 20))\n",
+    "    \n",
+    "    sv_data = []\n",
+    "    begin, end = interval\n",
+    "    for id_img, zones in enumerate(zones_data):\n",
+    "        \n",
+    "        for id_zone, zone in enumerate(zones):\n",
+    "            U, s, V = processing.get_LAB_L_SVD(zone)\n",
+    "        \n",
+    "            data = s[begin:end]\n",
+    "            \n",
+    "            if _norm:\n",
+    "                data = utils.normalize_arr(data)\n",
+    "                \n",
+    "            plt.plot(data, \n",
+    "                     color=colors[id_zone], \n",
+    "                     label='Zone ' + str(zones_indices[id_zone]) + ' of ' + scene_name + '_' + str(image_indices[id_img]))\n",
+    "            \n",
+    "    plt.legend(fontsize=18)\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Useful function\n",
+    "\n",
+    "def get_highest_values(arr, n):\n",
+    "    return np.array(arr).argsort()[-n:][::-1]\n",
+    "\n",
+    "def get_lowest_values(arr, n):\n",
+    "    return np.array(arr).argsort()[::-1][-n:][::-1]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Scenes zones data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# start 00020 - ref 00900 - step 10\n",
+    "dict_appart = {'name': 'Appart1opt02', \n",
+    "               'prefix': 'appartAopt_', \n",
+    "               'indices': [\"00020\", \"00200\", \"00900\"],\n",
+    "               'zones': [3, 6]}\n",
+    "\n",
+    "# start 00050 - ref 01200 - step 10\n",
+    "dict_cuisine = {'name': 'Cuisine01', \n",
+    "               'prefix': 'cuisine01_', \n",
+    "               'indices': [\"00050\", \"00400\", \"01200\"],\n",
+    "               'zones': [3, 6]}\n",
+    "\n",
+    "# start 00020 - ref 00950 - step 10\n",
+    "dict_sdb_c = {'name': 'SdbCentre', \n",
+    "               'prefix': 'SdB2_', \n",
+    "               'indices': [\"00020\", \"00400\", \"00950\"],\n",
+    "               'zones': [3, 6]}\n",
+    "\n",
+    "# start 00020 - ref 00950 - step 10\n",
+    "dict_sdb_d = {'name': 'SdbDroite', \n",
+    "               'prefix': 'SdB2_D_', \n",
+    "               'indices': [\"00020\", \"00400\", \"00950\"],\n",
+    "               'zones': [2, 3, 10, 13]}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "current_dict = dict_sdb_d\n",
+    "interval = (30, 200)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "../fichiersSVD_light/SdbDroite/SdB2_D_00020.png\n",
+      "../fichiersSVD_light/SdbDroite/SdB2_D_00400.png\n",
+      "../fichiersSVD_light/SdbDroite/SdB2_D_00950.png\n"
+     ]
+    }
+   ],
+   "source": [
+    "images_path = compute_images_path(current_dict)\n",
+    "zones_data = get_images_zones(current_dict, images_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "first_image = zones_data[0][1]\n",
+    "entropy_contribution_data = []\n",
+    "\n",
+    "sv = processing.get_LAB_L_SVD_s(zone)\n",
+    "sv = utils.normalize_arr(sv)\n",
+    "entropy = utils.get_entropy(sv)\n",
+    "\n",
+    "for i in range(200):\n",
+    "    entropy_without_column = utils.get_entropy_without_i(sv, i)\n",
+    "    entropy_contribution_column = entropy - entropy_without_column\n",
+    "    entropy_contribution_data.append(entropy_contribution_column)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,\n",
+       "        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,\n",
+       "        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,\n",
+       "        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,\n",
+       "        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,\n",
+       "        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,\n",
+       "        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,\n",
+       "        92,  93,  94,  95,  96,  97,  98,  99, 100])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_highest_values(entropy_contribution_data, 100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([  0, 199, 198, 197, 196, 195, 194, 193, 192, 191, 190, 189, 188,\n",
+       "       187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175,\n",
+       "       174, 173, 172, 171, 170, 169, 168, 167, 166, 165, 164, 163, 162,\n",
+       "       161, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149,\n",
+       "       148, 147, 146, 145, 144, 143, 142, 141, 140, 139, 138, 137, 136,\n",
+       "       135, 134, 133, 132, 131, 130, 129, 128, 127, 126, 125, 124, 123,\n",
+       "       122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110,\n",
+       "       109, 108, 107, 106, 105, 104, 103, 102, 101])"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_lowest_values(entropy_contribution_data, 100)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "thesis-venv",
+   "language": "python",
+   "name": "thesis-venv"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

SVDAnalysis/svd_mean_rotations_view.py → analysis/svd_mean_rotations_view.py


Fichier diff supprimé car celui-ci est trop grand
+ 537 - 0
analysis/svd_reconstruction_analysis.ipynb


SVDAnalysis/svd_roration_view.py → analysis/svd_rotation_view.py


Fichier diff supprimé car celui-ci est trop grand
+ 205 - 0
analysis/svd_scenes_analysis.ipynb


Fichier diff supprimé car celui-ci est trop grand
+ 332 - 0
analysis/svd_zones_analysis.ipynb


+ 59 - 42
cnn_keras_svd.py

@@ -2,28 +2,27 @@ from keras.preprocessing.image import ImageDataGenerator
 from keras.models import Sequential
 from keras.models import Sequential
 from keras.layers import Conv1D, MaxPooling1D
 from keras.layers import Conv1D, MaxPooling1D
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
+from keras.wrappers.scikit_learn import KerasClassifier
 from keras import backend as K
 from keras import backend as K
-import matplotlib.pyplot as plt
 
 
 from sklearn.utils import shuffle
 from sklearn.utils import shuffle
+from sklearn.metrics import roc_auc_score
 
 
 import numpy as np
 import numpy as np
 import pandas as pd
 import pandas as pd
 
 
 from ipfml import processing
 from ipfml import processing
+import modules.utils.config as cfg
+
 from PIL import Image
 from PIL import Image
 
 
-import sys, os, getopt
+import sys, os
+import argparse
+import json
+
 import subprocess
 import subprocess
 import time
 import time
 
 
-vector_size = 100
-epochs = 100
-batch_size = 24
-
-input_shape = (vector_size, 1)
-filename = "svd_model"
-
 def f1(y_true, y_pred):
 def f1(y_true, y_pred):
     def recall(y_true, y_pred):
     def recall(y_true, y_pred):
         """Recall metric.
         """Recall metric.
@@ -54,7 +53,7 @@ def f1(y_true, y_pred):
     recall = recall(y_true, y_pred)
     recall = recall(y_true, y_pred)
     return 2*((precision*recall)/(precision+recall+K.epsilon()))
     return 2*((precision*recall)/(precision+recall+K.epsilon()))
 
 
-def generate_model():
+def generate_model(input_shape):
 
 
     model = Sequential()
     model = Sequential()
 
 
@@ -90,35 +89,35 @@ def generate_model():
 
 
     model.add(Flatten(input_shape=input_shape))
     model.add(Flatten(input_shape=input_shape))
 
 
-    #model.add(Dense(2048))
-    #model.add(Activation('relu'))
-    #model.add(BatchNormalization())
-    #model.add(Dropout(0.3))
+    model.add(Dense(2048))
+    model.add(Activation('relu'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.2))
 
 
     model.add(Dense(1024))
     model.add(Dense(1024))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
-    model.add(Dropout(0.4))
+    model.add(Dropout(0.2))
 
 
     model.add(Dense(512))
     model.add(Dense(512))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
-    model.add(Dropout(0.4))
+    model.add(Dropout(0.3))
 
 
     model.add(Dense(256))
     model.add(Dense(256))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
-    model.add(Dropout(0.4))
+    model.add(Dropout(0.3))
 
 
     model.add(Dense(128))
     model.add(Dense(128))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
-    model.add(Dropout(0.4))
+    model.add(Dropout(0.3))
 
 
     model.add(Dense(20))
     model.add(Dense(20))
     model.add(Activation('relu'))
     model.add(Activation('relu'))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
-    model.add(Dropout(0.4))
+    model.add(Dropout(0.3))
 
 
     model.add(Dense(1))
     model.add(Dense(1))
     model.add(Activation('sigmoid'))
     model.add(Activation('sigmoid'))
@@ -131,24 +130,22 @@ def generate_model():
 
 
 def main():
 def main():
 
 
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python save_model_result_in_md.py --data filename')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hd", ["help=", "data="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python save_model_result_in_md.py --data filename')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python save_model_result_in_md.py --data filename')
-            sys.exit()
-        elif o in ("-d", "--data"):
-            p_datafile = a
-        else:
-            assert False, "unhandled option"
+    parser = argparse.ArgumentParser(description="Process deep_network_keras_svd.py parameters")
+
+    parser.add_argument('--data', type=str, help='Data filename prefix to access train and test dataset')
+    parser.add_argument('--output', type=str, help='Name of filename to save model into')
+    parser.add_argument('--size', type=int, help='Size of input data vector')
+
+    args = parser.parse_args()
+
+    p_datafile = args.data
+    p_output_filename = args.output
+    p_vector_size = args.size
+
+    epochs = 10
+    batch_size = cfg.keras_batch
+
+    input_shape = (p_vector_size, 1)
 
 
     ###########################
     ###########################
     # 1. Get and prepare data
     # 1. Get and prepare data
@@ -190,21 +187,41 @@ def main():
     # 2. Getting model
     # 2. Getting model
     #######################
     #######################
 
 
-    model = generate_model()
+    model = generate_model(input_shape)
     model.summary()
     model.summary()
+    #model = KerasClassifier(build_fn=model, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch, verbose=0)
 
 
     #######################
     #######################
     # 3. Fit model : use of cross validation to fit model
     # 3. Fit model : use of cross validation to fit model
     #######################
     #######################
 
 
     # reshape input data
     # reshape input data
-    x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), vector_size, 1)
-    x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), vector_size, 1)
+    x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), p_vector_size, 1)
+    x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), p_vector_size, 1)
 
 
-    model.fit(x_dataset_train, y_dataset_train, epochs=epochs, batch_size=batch_size, validation_split=0.20)
+    model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
 
 
     score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
     score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
-    print(score)
+
+    if not os.path.exists(cfg.saved_models_folder):
+        os.makedirs(cfg.saved_models_folder)
+
+    # save the model into HDF5 file
+    model_output_path = os.path.join(cfg.saved_models_folder, p_output_filename + '.json')
+    json_model_content = model.to_json()
+
+    with open(model_output_path, 'w') as f:
+        print("Model saved into ", model_output_path)
+        json.dump(json_model_content, f, indent=4)
+
+    model.save_weights(model_output_path.replace('.json', '.h5'))
+
+    # Save results obtained from model
+    y_test_prediction = model.predict(x_dataset_test)
+    print("Metrics : ", model.metrics_names)
+    print("Prediction : ", score)
+    print("ROC AUC : ", roc_auc_score(y_dataset_test, y_test_prediction))
+
 
 
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 46 - 28
display_simulation_curves.py

@@ -2,20 +2,30 @@ import numpy as np
 import pandas as pd
 import pandas as pd
 
 
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
-import os, sys, getopt
+import os, sys, argparse
 
 
 from modules.utils.data import get_svd_data
 from modules.utils.data import get_svd_data
 
 
-label_freq = 6
+from modules.utils import config as cfg
 
 
-def display_curves(folder_path):
+learned_zones_folder = cfg.learned_zones_folder
+models_name          = cfg.models_names_list
+label_freq           = 6
+
+def display_curves(folder_path, model_name):
     """
     """
     @brief Method used to display simulation given .csv files
     @brief Method used to display simulation given .csv files
     @param folder_path, folder which contains all .csv files obtained during simulation
     @param folder_path, folder which contains all .csv files obtained during simulation
+    @param model_name, current name of model
     @return nothing
     @return nothing
     """
     """
 
 
-    data_files = os.listdir(folder_path)
+    for name in models_name:
+        if name in model_name:
+            data_filename = model_name
+            learned_zones_folder_path = os.path.join(learned_zones_folder, data_filename)
+
+    data_files = [x for x in os.listdir(folder_path) if '.png' not in x]
 
 
     scene_names = [f.split('_')[3] for f in data_files]
     scene_names = [f.split('_')[3] for f in data_files]
 
 
@@ -24,6 +34,15 @@ def display_curves(folder_path):
         print(scene_names[id])
         print(scene_names[id])
         path_file = os.path.join(folder_path, f)
         path_file = os.path.join(folder_path, f)
 
 
+        scenes_zones_used_file_path = os.path.join(learned_zones_folder_path, scene_names[id] + '.csv')
+
+        zones_used = []
+
+        with open(scenes_zones_used_file_path, 'r') as f:
+            zones_used = [int(x) for x in f.readline().split(';') if x != '']
+
+        print(zones_used)
+
         df = pd.read_csv(path_file, header=None, sep=";")
         df = pd.read_csv(path_file, header=None, sep=";")
 
 
         fig=plt.figure(figsize=(35, 22))
         fig=plt.figure(figsize=(35, 22))
@@ -48,10 +67,18 @@ def display_curves(folder_path):
             fig.add_subplot(4, 4, (index + 1))
             fig.add_subplot(4, 4, (index + 1))
             plt.plot(row[5:])
             plt.plot(row[5:])
 
 
+            if index in zones_used:
+                ax = plt.gca()
+                ax.set_facecolor((0.9, 0.95, 0.95))
+
             # draw vertical line from (70,100) to (70, 250)
             # draw vertical line from (70,100) to (70, 250)
             plt.plot([counter_index, counter_index], [-2, 2], 'k-', lw=2, color='red')
             plt.plot([counter_index, counter_index], [-2, 2], 'k-', lw=2, color='red')
-            plt.ylabel('Not noisy / Noisy', fontsize=18)
-            plt.xlabel('Time in minutes / Samples per pixel', fontsize=16)
+
+            if index % 4 == 0:
+                plt.ylabel('Not noisy / Noisy', fontsize=20)
+
+            if index >= 12:
+                plt.xlabel('Samples per pixel', fontsize=20)
 
 
             x_labels = [id * step_value + start_index for id, val in enumerate(row[5:]) if id % label_freq == 0]
             x_labels = [id * step_value + start_index for id, val in enumerate(row[5:]) if id % label_freq == 0]
 
 
@@ -65,28 +92,19 @@ def display_curves(folder_path):
 
 
 def main():
 def main():
 
 
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python display_simulation_curves.py --folder "path"')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hm:s:k", ["help=", "folder="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python display_simulation_curves.py --folder "path"')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python display_simulation_curves.py --folder "path"')
-            sys.exit()
-        elif o in ("-f", "--folder"):
-            p_folder = a
-
-        else:
-            assert False, "unhandled option"
-
-
-    display_curves(p_folder)
+    parser = argparse.ArgumentParser(description="Display simulations curves from simulation data")
+
+    parser.add_argument('--folder', type=str, help='Folder which contains simulations data for scenes')
+    parser.add_argument('--model', type=str, help='Name of the model used for simulations')
+
+    args = parser.parse_args()
+
+    p_folder = args.folder
+    p_model = args.model
+
+    display_curves(p_folder, p_model)
+
+    print(p_folder)
 
 
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 310 - 0
display_svd_area_scenes.py

@@ -0,0 +1,310 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+import ipfml.iqa.fr as fr_iqa
+
+from skimage import color
+
+import matplotlib.pyplot as plt
+from modules.utils.data import get_svd_data
+
+from modules.utils import config as cfg
+
+# getting configuration information
+config_filename     = cfg.config_filename
+zone_folder         = cfg.zone_folder
+min_max_filename    = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list         = cfg.scenes_names
+scenes_indices      = cfg.scenes_indices
+choices             = cfg.normalization_choices
+path                = cfg.dataset_path
+zones               = cfg.zones_indices
+seuil_expe_filename = cfg.seuil_expe_filename
+
+metric_choices      = cfg.metric_choices_labels
+
+max_nb_bits = 8
+
+integral_area_choices = ['trapz', 'simps']
+
+def get_area_under_curve(p_area, p_data):
+
+    noise_method = None
+    function_name = 'integral_area_' + p_area
+
+    try:
+        area_method = getattr(utils, function_name)
+    except AttributeError:
+        raise NotImplementedError("Error `{}` not implement `{}`".format(utils.__name__, function_name))
+
+    return area_method(p_data, dx=800)
+
+
+def display_svd_values(p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_area, p_ylim):
+    """
+    @brief Method which gives information about svd curves from zone of picture
+    @param p_scene, scene expected to show svd values
+    @param p_interval, interval [begin, end] of svd data to display
+    @param p_interval, interval [begin, end] of samples or minutes from render generation engine
+    @param p_metric, metric computed to show
+    @param p_mode, normalization's mode
+    @param p_norm, normalization or not of selected svd data
+    @param p_area, area method name to compute area under curve
+    @param p_ylim, ylim choice to better display of data
+    @return nothing
+    """
+
+    image_indices = []
+
+    scenes = os.listdir(path)
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    begin_data, end_data = p_interval
+    begin_index, end_index = p_indices
+
+    data_min_max_filename = os.path.join(path, p_metric + min_max_filename)
+
+    # Store all informations about scenes
+    scenes_area_data = []
+    scenes_images_indices = []
+    scenes_threshold_mean = []
+
+    # go ahead each scenes
+    for id_scene, folder_scene in enumerate(scenes):
+
+        max_value_svd = 0
+        min_value_svd = sys.maxsize
+
+        scene_path = os.path.join(path, folder_scene)
+
+        config_file_path = os.path.join(scene_path, config_filename)
+
+        with open(config_file_path, "r") as config_file:
+            last_image_name = config_file.readline().strip()
+            prefix_image_name = config_file.readline().strip()
+            start_index_image = config_file.readline().strip()
+            end_index_image = config_file.readline().strip()
+            step_counter = int(config_file.readline().strip())
+
+        # construct each zones folder name
+        zones_folder = []
+
+        # get zones list info
+        for index in zones:
+            index_str = str(index)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+
+            current_zone = "zone"+index_str
+            zones_folder.append(current_zone)
+
+        # store data information for current scene
+        images_data = []
+        images_indices = []
+        threshold_learned_zones = []
+
+        for id, zone_folder in enumerate(zones_folder):
+
+            # get threshold information
+            zone_path = os.path.join(scene_path, zone_folder)
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            # open treshold path and get this information
+            with open(path_seuil, "r") as seuil_file:
+                threshold_learned = int(seuil_file.readline().strip())
+                threshold_learned_zones.append(threshold_learned)
+
+        current_counter_index = int(start_index_image)
+        end_counter_index = int(end_index_image)
+
+        threshold_mean = np.mean(np.asarray(threshold_learned_zones))
+        threshold_image_found = False
+        scenes_threshold_mean.append(int(threshold_mean / p_step))
+
+        file_path = os.path.join(scene_path, prefix_image_name + "{}.png")
+
+        svd_data = []
+
+        while(current_counter_index <= end_counter_index):
+
+            current_counter_index_str = str(current_counter_index)
+
+            while len(start_index_image) > len(current_counter_index_str):
+                current_counter_index_str = "0" + current_counter_index_str
+
+            image_path = file_path.format(str(current_counter_index_str))
+            img = Image.open(image_path)
+
+            svd_values = get_svd_data(p_metric, img)
+
+            if p_norm:
+                svd_values = svd_values[begin_data:end_data]
+
+            # update min max values
+            min_value = svd_values.min()
+            max_value = svd_values.max()
+
+            if min_value < min_value_svd:
+                min_value_svd = min_value
+
+            if max_value > min_value_svd:
+                max_value_svd = max_value
+
+            # keep in memory used data
+            if current_counter_index % p_step == 0:
+                if current_counter_index >= begin_index and current_counter_index <= end_index:
+                    images_indices.append(current_counter_index_str)
+                    svd_data.append(svd_values)
+
+                if threshold_mean < int(current_counter_index) and not threshold_image_found:
+
+                    threshold_image_found = True
+                    threshold_image_zone = current_counter_index_str
+
+            current_counter_index += step_counter
+            print('%.2f%%' % (current_counter_index / end_counter_index * 100))
+            sys.stdout.write("\033[F")
+
+
+            # all indices of picture to plot
+        print("Scene %s : %s" % (folder_scene, images_indices))
+
+
+        scenes_images_indices.append(image_indices)
+
+        area_data = []
+
+        for id, data in enumerate(svd_data):
+
+            current_data = data
+
+            if not p_norm:
+                current_data = current_data[begin_data:end_data]
+
+            if p_mode == 'svdn':
+                current_data = utils.normalize_arr(current_data)
+
+            if p_mode == 'svdne':
+                current_data = utils.normalize_arr_with_range(current_data, min_value_svd, max_value_svd)
+
+            images_data.append(current_data)
+
+            # not use this script for 'sub_blocks_stats'
+            current_area = get_area_under_curve(p_area, current_data)
+            area_data.append(current_area)
+
+        scenes_area_data.append(area_data)
+
+    # display all data using matplotlib (configure plt)
+    plt.title('Scenes area interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + ']' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=20)
+    plt.ylabel('Image samples or time (minutes) generation', fontsize=14)
+    plt.xlabel('Vector features', fontsize=16)
+
+    plt.legend(bbox_to_anchor=(0.7, 1), loc=2, borderaxespad=0.2, fontsize=14)
+
+    for id, area_data in enumerate(scenes_area_data):
+
+        threshold_id = 0
+        scene_name = scenes[id]
+        image_indices = scenes_images_indices[id]
+        threshold_image_zone = scenes_threshold_mean[id]
+
+        p_label = scene_name + '_' + str(images_indices[id])
+
+        threshold_id = scenes_threshold_mean[id]
+
+        print(p_label)
+        start_ylim, end_ylim = p_ylim
+
+        plt.plot(area_data, label=p_label)
+        #ax2.set_xticks(range(len(images_indices)))
+        #ax2.set_xticklabels(list(map(int, images_indices)))
+        if threshold_id != 0:
+            print("Plot threshold ", threshold_id)
+            plt.plot([threshold_id, threshold_id], [np.min(area_data), np.max(area_data)], 'k-', lw=2, color='red')
+
+
+    #start_ylim, end_ylim = p_ylim
+    #plt.ylim(start_ylim, end_ylim)
+
+    plt.show()
+
+def main():
+
+
+    # by default p_step value is 10 to enable all photos
+    p_step = 10
+    p_ylim = (0, 1)
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python display_svd_area_scenes.py --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --area simps --ylim "0, 0.1"')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:a:y", ["help=", "scene=", "interval=", "indices=", "metric=", "mode=", "step=", "norm=", "area=", "ylim="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python display_svd_area_scenes.py --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --area simps --ylim "0, 0.1"')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python display_svd_area_scenes.py --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --area simps --ylim "0, 0.1"')
+            sys.exit()
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+
+        elif o in ("-i", "--indices"):
+            p_indices = list(map(int, a.split(',')))
+
+        elif o in ("-m", "--metric"):
+            p_metric = a
+
+            if p_metric not in metric_choices:
+                assert False, "Invalid metric choice"
+
+        elif o in ("-m", "--mode"):
+            p_mode = a
+
+            if p_mode not in choices:
+                assert False, "Invalid normalization choice, expected ['svd', 'svdn', 'svdne']"
+
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+
+        elif o in ("-n", "--norm"):
+            p_norm = int(a)
+
+        elif o in ("-a", "--area"):
+            p_area = a
+
+            if p_area not in integral_area_choices:
+                assert False, "Invalid area computation choices : %s " % integral_area_choices
+
+        elif o in ("-y", "--ylim"):
+            p_ylim = list(map(float, a.split(',')))
+
+        else:
+            assert False, "unhandled option"
+
+    display_svd_values(p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_area, p_ylim)
+
+if __name__== "__main__":
+    main()

+ 323 - 0
display_svd_data_error_scene.py

@@ -0,0 +1,323 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+import ipfml.iqa.fr as fr_iqa
+
+from skimage import color
+
+import matplotlib.pyplot as plt
+from modules.utils.data import get_svd_data
+
+from modules.utils import config as cfg
+
+# getting configuration information
+config_filename     = cfg.config_filename
+zone_folder         = cfg.zone_folder
+min_max_filename    = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list         = cfg.scenes_names
+scenes_indices      = cfg.scenes_indices
+choices             = cfg.normalization_choices
+path                = cfg.dataset_path
+zones               = cfg.zones_indices
+seuil_expe_filename = cfg.seuil_expe_filename
+
+metric_choices      = cfg.metric_choices_labels
+
+max_nb_bits         = 8
+display_error       = False
+
+error_data_choices  = ['mae', 'mse', 'ssim', 'psnr']
+
+
+def get_error_distance(p_error, y_true, y_test):
+
+    noise_method = None
+    function_name = p_error
+
+    try:
+        error_method = getattr(fr_iqa, function_name)
+    except AttributeError:
+        raise NotImplementedError("Error `{}` not implement `{}`".format(fr_iqa.__name__, function_name))
+
+    return error_method(y_true, y_test)
+
+
+def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_error, p_ylim):
+    """
+    @brief Method which gives information about svd curves from zone of picture
+    @param p_scene, scene expected to show svd values
+    @param p_interval, interval [begin, end] of svd data to display
+    @param p_interval, interval [begin, end] of samples or minutes from render generation engine
+    @param p_metric, metric computed to show
+    @param p_mode, normalization's mode
+    @param p_norm, normalization or not of selected svd data
+    @param p_error, error metric used to display
+    @param p_ylim, ylim choice to better display of data
+    @return nothing
+    """
+
+    max_value_svd = 0
+    min_value_svd = sys.maxsize
+
+    image_indices = []
+
+    scenes = os.listdir(path)
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    begin_data, end_data = p_interval
+    begin_index, end_index = p_indices
+
+    data_min_max_filename = os.path.join(path, p_metric + min_max_filename)
+
+    # go ahead each scenes
+    for id_scene, folder_scene in enumerate(scenes):
+
+        if p_scene == folder_scene:
+            scene_path = os.path.join(path, folder_scene)
+
+            config_file_path = os.path.join(scene_path, config_filename)
+
+            with open(config_file_path, "r") as config_file:
+                last_image_name = config_file.readline().strip()
+                prefix_image_name = config_file.readline().strip()
+                start_index_image = config_file.readline().strip()
+                end_index_image = config_file.readline().strip()
+                step_counter = int(config_file.readline().strip())
+
+            # construct each zones folder name
+            zones_folder = []
+
+            # get zones list info
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+
+                current_zone = "zone"+index_str
+                zones_folder.append(current_zone)
+
+            images_data = []
+            images_indices = []
+
+            threshold_learned_zones = []
+
+            for id, zone_folder in enumerate(zones_folder):
+
+                # get threshold information
+
+                zone_path = os.path.join(scene_path, zone_folder)
+                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+                # open treshold path and get this information
+                with open(path_seuil, "r") as seuil_file:
+                    threshold_learned = int(seuil_file.readline().strip())
+                    threshold_learned_zones.append(threshold_learned)
+
+            current_counter_index = int(start_index_image)
+            end_counter_index = int(end_index_image)
+
+            threshold_mean = np.mean(np.asarray(threshold_learned_zones))
+            threshold_image_found = False
+
+            file_path = os.path.join(scene_path, prefix_image_name + "{}.png")
+
+            svd_data = []
+
+            while(current_counter_index <= end_counter_index):
+
+                current_counter_index_str = str(current_counter_index)
+
+                while len(start_index_image) > len(current_counter_index_str):
+                    current_counter_index_str = "0" + current_counter_index_str
+
+                image_path = file_path.format(str(current_counter_index_str))
+                img = Image.open(image_path)
+
+                svd_values = get_svd_data(p_metric, img)
+
+                if p_norm:
+                    svd_values = svd_values[begin_data:end_data]
+
+                # update min max values
+                min_value = svd_values.min()
+                max_value = svd_values.max()
+
+                if min_value < min_value_svd:
+                    min_value_svd = min_value
+
+                if max_value > min_value_svd:
+                    max_value_svd = max_value
+
+                # keep in memory used data
+                if current_counter_index % p_step == 0:
+                    if current_counter_index >= begin_index and current_counter_index <= end_index:
+                        images_indices.append(current_counter_index_str)
+                        svd_data.append(svd_values)
+
+                    if threshold_mean < int(current_counter_index) and not threshold_image_found:
+
+                        threshold_image_found = True
+                        threshold_image_zone = current_counter_index_str
+
+                current_counter_index += step_counter
+                print('%.2f%%' % (current_counter_index / end_counter_index * 100))
+                sys.stdout.write("\033[F")
+
+
+            # all indices of picture to plot
+            print(images_indices)
+
+            previous_data = []
+            error_data = [0.]
+
+            for id, data in enumerate(svd_data):
+
+                current_data = data
+
+                if not p_norm:
+                    current_data = current_data[begin_data:end_data]
+
+                if p_mode == 'svdn':
+                    current_data = utils.normalize_arr(current_data)
+
+                if p_mode == 'svdne':
+                    current_data = utils.normalize_arr_with_range(current_data, min_value_svd, max_value_svd)
+
+                images_data.append(current_data)
+
+                # use of whole image data for computation of ssim or psnr
+                if p_error == 'ssim' or p_error == 'psnr':
+                    image_path = file_path.format(str(current_id))
+                    current_data = np.asarray(Image.open(image_path))
+
+                if len(previous_data) > 0:
+
+                    current_error = get_error_distance(p_error, previous_data, current_data)
+                    error_data.append(current_error)
+
+                if len(previous_data) == 0:
+                    previous_data = current_data
+
+            # display all data using matplotlib (configure plt)
+            gridsize = (3, 2)
+
+            # fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(30, 22))
+            fig = plt.figure(figsize=(30, 22))
+            ax1 = plt.subplot2grid(gridsize, (0, 0), colspan=2, rowspan=2)
+            ax2 = plt.subplot2grid(gridsize, (2, 0), colspan=2)
+
+
+            ax1.set_title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + '], ' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=20)
+            ax1.set_ylabel('Image samples or time (minutes) generation', fontsize=14)
+            ax1.set_xlabel('Vector features', fontsize=16)
+
+            for id, data in enumerate(images_data):
+
+                if display_error:
+                    p_label = p_scene + '_' + str(images_indices[id]) + " | " + p_error + ": " + str(error_data[id])
+                else:
+                    p_label = p_scene + '_' + str(images_indices[id])
+
+                if images_indices[id] == threshold_image_zone:
+                    ax1.plot(data, label=p_label + " (threshold mean)", lw=4, color='red')
+                else:
+                    ax1.plot(data, label=p_label)
+
+            ax1.legend(bbox_to_anchor=(0.7, 1), loc=2, borderaxespad=0.2, fontsize=14)
+
+            start_ylim, end_ylim = p_ylim
+            ax1.set_ylim(start_ylim, end_ylim)
+
+            ax2.set_title(p_error + " information for whole step images")
+            ax2.set_ylabel(p_error + ' error')
+            ax2.set_xlabel('Number of samples per pixels or times')
+            ax2.set_xticks(range(len(images_indices)))
+            ax2.set_xticklabels(list(map(int, images_indices)))
+            ax2.plot(error_data)
+
+            plot_name = p_scene + '_' + p_metric + '_' + str(p_step) + '_' + p_mode + '_' + str(p_norm) + '.png'
+            plt.savefig(plot_name)
+
+def main():
+
+
+    # by default p_step value is 10 to enable all photos
+    p_step = 10
+    p_ylim = (0, 1)
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:e:y", ["help=", "scene=", "interval=", "indices=", "metric=", "mode=", "step=", "norm=", "error=", "ylim="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+            sys.exit()
+        elif o in ("-s", "--scene"):
+            p_scene = a
+
+            if p_scene not in scenes_indices:
+                assert False, "Invalid scene choice"
+            else:
+                p_scene = scenes_list[scenes_indices.index(p_scene)]
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+
+        elif o in ("-i", "--indices"):
+            p_indices = list(map(int, a.split(',')))
+
+        elif o in ("-m", "--metric"):
+            p_metric = a
+
+            if p_metric not in metric_choices:
+                assert False, "Invalid metric choice"
+
+        elif o in ("-m", "--mode"):
+            p_mode = a
+
+            if p_mode not in choices:
+                assert False, "Invalid normalization choice, expected ['svd', 'svdn', 'svdne']"
+
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+
+        elif o in ("-n", "--norm"):
+            p_norm = int(a)
+
+        elif o in ("-e", "--error"):
+            p_error = a
+
+        elif o in ("-y", "--ylim"):
+            p_ylim = list(map(float, a.split(',')))
+
+        else:
+            assert False, "unhandled option"
+
+    display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_error, p_ylim)
+
+if __name__== "__main__":
+    main()

+ 29 - 63
display_svd_data_scene.py

@@ -13,6 +13,7 @@ import numpy as np
 import random
 import random
 import time
 import time
 import json
 import json
+import math
 
 
 from PIL import Image
 from PIL import Image
 from ipfml import processing, metrics, utils
 from ipfml import processing, metrics, utils
@@ -20,9 +21,10 @@ import ipfml.iqa.fr as fr_iqa
 
 
 from skimage import color
 from skimage import color
 
 
+import matplotlib as mpl
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
-from modules.utils.data import get_svd_data
 
 
+from modules.utils.data import get_svd_data
 from modules.utils import config as cfg
 from modules.utils import config as cfg
 
 
 # getting configuration information
 # getting configuration information
@@ -40,25 +42,11 @@ seuil_expe_filename = cfg.seuil_expe_filename
 
 
 metric_choices      = cfg.metric_choices_labels
 metric_choices      = cfg.metric_choices_labels
 
 
-max_nb_bits = 8
-
-error_data_choices  = ['mae', 'mse', 'ssim', 'psnr']
-
-
-def get_error_distance(p_error, y_true, y_test):
-
-    noise_method = None
-    function_name = p_error
-
-    try:
-        error_method = getattr(fr_iqa, function_name)
-    except AttributeError:
-        raise NotImplementedError("Error `{}` not implement `{}`".format(fr_iqa.__name__, function_name))
-
-    return error_method(y_true, y_test)
+max_nb_bits         = 8
+display_error       = False
 
 
 
 
-def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_error, p_ylim):
+def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_ylim):
     """
     """
     @brief Method which gives information about svd curves from zone of picture
     @brief Method which gives information about svd curves from zone of picture
     @param p_scene, scene expected to show svd values
     @param p_scene, scene expected to show svd values
@@ -67,7 +55,6 @@ def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step,
     @param p_metric, metric computed to show
     @param p_metric, metric computed to show
     @param p_mode, normalization's mode
     @param p_mode, normalization's mode
     @param p_norm, normalization or not of selected svd data
     @param p_norm, normalization or not of selected svd data
-    @param p_error, error metric used to display
     @param p_ylim, ylim choice to better display of data
     @param p_ylim, ylim choice to better display of data
     @return nothing
     @return nothing
     """
     """
@@ -155,6 +142,8 @@ def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step,
                 if p_norm:
                 if p_norm:
                     svd_values = svd_values[begin_data:end_data]
                     svd_values = svd_values[begin_data:end_data]
 
 
+                #svd_values = np.asarray([math.log(x) for x in svd_values])
+
                 # update min max values
                 # update min max values
                 min_value = svd_values.min()
                 min_value = svd_values.min()
                 max_value = svd_values.max()
                 max_value = svd_values.max()
@@ -184,9 +173,6 @@ def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step,
             # all indices of picture to plot
             # all indices of picture to plot
             print(images_indices)
             print(images_indices)
 
 
-            previous_data = []
-            error_data = [0.]
-
             for id, data in enumerate(svd_data):
             for id, data in enumerate(svd_data):
 
 
                 current_data = data
                 current_data = data
@@ -202,54 +188,37 @@ def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step,
 
 
                 images_data.append(current_data)
                 images_data.append(current_data)
 
 
-                # use of whole image data for computation of ssim or psnr
-                if p_error == 'ssim' or p_error == 'psnr':
-                    image_path = file_path.format(str(current_id))
-                    current_data = np.asarray(Image.open(image_path))
-
-                if len(previous_data) > 0:
-
-                    current_error = get_error_distance(p_error, previous_data, current_data)
-                    error_data.append(current_error)
-
-                if len(previous_data) == 0:
-                    previous_data = current_data
 
 
             # display all data using matplotlib (configure plt)
             # display all data using matplotlib (configure plt)
-            gridsize = (3, 2)
-
-            # fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(30, 22))
-            fig = plt.figure(figsize=(30, 22))
-            ax1 = plt.subplot2grid(gridsize, (0, 0), colspan=2, rowspan=2)
-            ax2 = plt.subplot2grid(gridsize, (2, 0), colspan=2)
+            #fig = plt.figure(figsize=(30, 22))
+            fig, ax = plt.subplots(figsize=(30, 22))
+            ax.set_facecolor('#F9F9F9')
+            #fig.patch.set_facecolor('#F9F9F9')
 
 
+            ax.tick_params(labelsize=22)
+            #plt.rc('xtick', labelsize=22)
+            #plt.rc('ytick', labelsize=22)
 
 
-            ax1.set_title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + ']' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=20)
-            ax1.set_ylabel('Image samples or time (minutes) generation', fontsize=14)
-            ax1.set_xlabel('Vector features', fontsize=16)
+            #plt.title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + '], ' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=24)
+            ax.set_ylabel('Component values', fontsize=30)
+            ax.set_xlabel('Vector features', fontsize=30)
 
 
             for id, data in enumerate(images_data):
             for id, data in enumerate(images_data):
 
 
-                p_label = p_scene + '_' + str(images_indices[id]) + " | " + p_error + ": " + str(error_data[id])
+                p_label = p_scene + '_' + str(images_indices[id])
 
 
                 if images_indices[id] == threshold_image_zone:
                 if images_indices[id] == threshold_image_zone:
-                    ax1.plot(data, label=p_label, lw=4, color='red')
+                    ax.plot(data, label=p_label + " (threshold mean)", lw=4, color='red')
                 else:
                 else:
-                    ax1.plot(data, label=p_label)
+                    ax.plot(data, label=p_label)
 
 
-            ax1.legend(bbox_to_anchor=(0.7, 1), loc=2, borderaxespad=0.2, fontsize=14)
+            plt.legend(bbox_to_anchor=(0.65, 0.98), loc=2, borderaxespad=0.2, fontsize=24)
 
 
             start_ylim, end_ylim = p_ylim
             start_ylim, end_ylim = p_ylim
-            ax1.set_ylim(start_ylim, end_ylim)
+            #ax.set_ylim(start_ylim, end_ylim)
 
 
-            ax2.set_title(p_error + " information for whole step images")
-            ax2.set_ylabel(p_error + ' error')
-            ax2.set_xlabel('Number of samples per pixels or times')
-            ax2.set_xticks(range(len(images_indices)))
-            ax2.set_xticklabels(list(map(int, images_indices)))
-            ax2.plot(error_data)
-
-            plt.show()
+            plot_name = p_scene + '_' + p_metric + '_' + str(p_step) + '_' + p_mode + '_' + str(p_norm) + '.png'
+            plt.savefig(plot_name, facecolor=ax.get_facecolor())
 
 
 def main():
 def main():
 
 
@@ -260,17 +229,17 @@ def main():
 
 
     if len(sys.argv) <= 1:
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
         print('Run with default parameters...')
-        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --ylim "0, 0.1"')
         sys.exit(2)
         sys.exit(2)
     try:
     try:
         opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:e:y", ["help=", "scene=", "interval=", "indices=", "metric=", "mode=", "step=", "norm=", "error=", "ylim="])
         opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:e:y", ["help=", "scene=", "interval=", "indices=", "metric=", "mode=", "step=", "norm=", "error=", "ylim="])
     except getopt.GetoptError:
     except getopt.GetoptError:
         # print help information and exit:
         # print help information and exit:
-        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --ylim "0, 0.1"')
         sys.exit(2)
         sys.exit(2)
     for o, a in opts:
     for o, a in opts:
         if o == "-h":
         if o == "-h":
-            print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+            print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --ylim "0, 0.1"')
             sys.exit()
             sys.exit()
         elif o in ("-s", "--scene"):
         elif o in ("-s", "--scene"):
             p_scene = a
             p_scene = a
@@ -303,16 +272,13 @@ def main():
         elif o in ("-n", "--norm"):
         elif o in ("-n", "--norm"):
             p_norm = int(a)
             p_norm = int(a)
 
 
-        elif o in ("-e", "--error"):
-            p_error = a
-
         elif o in ("-y", "--ylim"):
         elif o in ("-y", "--ylim"):
             p_ylim = list(map(float, a.split(',')))
             p_ylim = list(map(float, a.split(',')))
 
 
         else:
         else:
             assert False, "unhandled option"
             assert False, "unhandled option"
 
 
-    display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_error, p_ylim)
+    display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_ylim)
 
 
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 16 - 0
fichiersSVD_light/Cuisine01/test_cut.py

@@ -0,0 +1,16 @@
+from PIL import Image
+import numpy as np
+
+image_path_noisy = 'cuisine01_00400.png'
+image_path_ref = 'cuisine01_01200.png'
+
+image_noisy = np.asarray(Image.open(image_path_noisy))
+image_ref = np.asarray(Image.open(image_path_ref))
+
+first_part = image_noisy[:, 0:400]
+second_part = image_ref[:, 400:800]
+
+final_image = Image.fromarray(np.concatenate((first_part, second_part), axis=1))
+
+final_image.show()
+

BIN
fichiersSVD_light/SdbDroite/sceneSDB_Droite.zip


+ 1 - 1
generateAndTrain_maxwell.sh

@@ -44,7 +44,7 @@ for counter in {0..4}; do
         for mode in {"svd","svdn","svdne"}; do
         for mode in {"svd","svdn","svdne"}; do
             for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
             for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
 
-                FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                 MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                 MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
 
 
                 echo $FILENAME
                 echo $FILENAME

+ 1 - 1
generateAndTrain_maxwell_custom.sh

@@ -44,7 +44,7 @@ for counter in {0..4}; do
         for mode in {"svd","svdn","svdne"}; do
         for mode in {"svd","svdn","svdne"}; do
             for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
             for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
 
-                FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                 MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                 MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                 CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
                 CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
 
 

+ 74 - 0
generateAndTrain_maxwell_custom_center.sh

@@ -0,0 +1,74 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No argument supplied"
+    echo "Need of metric information"
+    exit 1
+fi
+
+result_filename="models_info/models_comparisons.csv"
+VECTOR_SIZE=200
+size=$1
+metric=$2
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+half=$(($size/2))
+start=-$half
+for counter in {0..4}; do
+    end=$(($start+$size))
+
+    if [ "$end" -gt "$VECTOR_SIZE" ]; then
+        start=$(($VECTOR_SIZE-$size))
+        end=$(($VECTOR_SIZE))
+    fi
+
+    if [ "$start" -lt "0" ]; then
+        start=$((0))
+        end=$(($size))
+    fi
+
+    for nb_zones in {4,6,8,10,12}; do
+
+        echo $start $end
+
+        for mode in {"svd","svdn","svdne"}; do
+            for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+                FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
+
+                echo $FILENAME
+
+                # only compute if necessary (perhaps server will fall.. Just in case)
+                if grep -q "${MODEL_NAME}" "${result_filename}"; then
+
+                    echo "${MODEL_NAME} results already generated..."
+                else
+                    python generate_data_model_random_center.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+                    python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                    #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+                    python save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+                fi
+            done
+        done
+    done
+
+    if [ "$counter" -eq "0" ]; then
+        start=$(($start+50-$half))
+    else
+        start=$(($start+50))
+    fi
+
+done

+ 74 - 0
generateAndTrain_maxwell_custom_split.sh

@@ -0,0 +1,74 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No argument supplied"
+    echo "Need of metric information"
+    exit 1
+fi
+
+result_filename="models_info/models_comparisons.csv"
+VECTOR_SIZE=200
+size=$1
+metric=$2
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+half=$(($size/2))
+start=-$half
+for counter in {0..4}; do
+    end=$(($start+$size))
+
+    if [ "$end" -gt "$VECTOR_SIZE" ]; then
+        start=$(($VECTOR_SIZE-$size))
+        end=$(($VECTOR_SIZE))
+    fi
+
+    if [ "$start" -lt "0" ]; then
+        start=$((0))
+        end=$(($size))
+    fi
+
+    for nb_zones in {4,6,8,10,12}; do
+
+        echo $start $end
+
+        for mode in {"svd","svdn","svdne"}; do
+            for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+                FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
+
+                echo $FILENAME
+
+                # only compute if necessary (perhaps server will fall.. Just in case)
+                if grep -q "${MODEL_NAME}" "${result_filename}"; then
+
+                    echo "${MODEL_NAME} results already generated..."
+                else
+                    python generate_data_model_random_split.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+                    python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                    #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+                    python save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+                fi
+            done
+        done
+    done
+
+    if [ "$counter" -eq "0" ]; then
+        start=$(($start+50-$half))
+    else
+        start=$(($start+50))
+    fi
+
+done

+ 79 - 93
generate_data_model.py

@@ -7,7 +7,7 @@ Created on Fri Sep 14 21:02:42 2018
 """
 """
 
 
 from __future__ import print_function
 from __future__ import print_function
-import sys, os, getopt
+import sys, os, argparse
 import numpy as np
 import numpy as np
 import random
 import random
 import time
 import time
@@ -21,7 +21,7 @@ from modules.utils import data as dt
 
 
 # getting configuration information
 # getting configuration information
 config_filename         = cfg.config_filename
 config_filename         = cfg.config_filename
-zone_folder             = cfg.zone_folder
+learned_folder          = cfg.learned_zones_folder
 min_max_filename        = cfg.min_max_filename_extension
 min_max_filename        = cfg.min_max_filename_extension
 
 
 # define all scenes values
 # define all scenes values
@@ -32,6 +32,8 @@ path                    = cfg.dataset_path
 zones                   = cfg.zones_indices
 zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
 seuil_expe_filename     = cfg.seuil_expe_filename
 
 
+renderer_choices        = cfg.renderer_choices
+normalization_choices   = cfg.normalization_choices
 metric_choices          = cfg.metric_choices_labels
 metric_choices          = cfg.metric_choices_labels
 output_data_folder      = cfg.output_data_folder
 output_data_folder      = cfg.output_data_folder
 custom_min_max_folder   = cfg.min_max_custom_folder
 custom_min_max_folder   = cfg.min_max_custom_folder
@@ -140,66 +142,71 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
     train_file = open(output_train_filename, 'w')
     train_file = open(output_train_filename, 'w')
     test_file = open(output_test_filename, 'w')
     test_file = open(output_test_filename, 'w')
 
 
-    scenes = os.listdir(path)
+    for id_scene, folder_scene in enumerate(scenes_list):
 
 
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
+        # only take care of maxwell scenes
+        scene_path = os.path.join(path, folder_scene)
 
 
-    for id_scene, folder_scene in enumerate(scenes):
+        zones_indices = zones
 
 
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
 
 
-            scene_path = os.path.join(path, folder_scene)
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
 
 
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
 
 
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
+        with open(file_learned_path, 'w') as f:
+            for i in _zones:
+                f.write(str(i) + ';')
 
 
-                # if custom normalization choices then we use svd values not already normalized
-                if _custom:
-                    data_filename = _metric + "_svd" + generic_output_file_svd
-                else:
-                    data_filename = _metric + "_" + _choice + generic_output_file_svd
+        for id_zone, index_folder in enumerate(zones_indices):
 
 
-                data_file_path = os.path.join(zone_path, data_filename)
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
 
 
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
+            zone_path = os.path.join(scene_path, current_zone_folder)
 
 
-                num_lines = len(lines)
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _metric + "_svd" + generic_output_file_svd
+            else:
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
 
 
-                lines_indexes = np.arange(num_lines)
-                random.shuffle(lines_indexes)
+            data_file_path = os.path.join(zone_path, data_filename)
 
 
-                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
 
 
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for index in lines_indexes:
+            num_lines = len(lines)
+
+            lines_indexes = np.arange(num_lines)
+            random.shuffle(lines_indexes)
 
 
-                    image_index = int(lines[index].split(';')[0])
-                    percent = counter / num_lines
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
 
 
-                    if image_index % _step == 0:
-                        line = construct_new_line(path_seuil, _interval, lines[index], _choice, _each, _norm)
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for index in lines_indexes:
 
 
-                        if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
-                            train_file.write(line)
-                        else:
-                            test_file.write(line)
+                image_index = int(lines[index].split(';')[0])
+                percent = counter / num_lines
 
 
-                    counter += 1
+                if image_index % _step == 0:
+                    line = construct_new_line(path_seuil, _interval, lines[index], _choice, _each, _norm)
 
 
-                f.close()
+                    if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
+                        train_file.write(line)
+                    else:
+                        test_file.write(line)
+
+                counter += 1
+
+            f.close()
 
 
     train_file.close()
     train_file.close()
     test_file.close()
     test_file.close()
@@ -207,55 +214,34 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
 
 
 def main():
 def main():
 
 
-    p_custom = False
-    p_step      = 1
-    p_renderer  = 'all'
-    p_each      = 1
-
-    if len(sys.argv) <= 1:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "zones=", "percent=", "renderer=", "step=", "each=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
-
-            sys.exit()
-        elif o in ("-o", "--output"):
-            p_filename = a
-        elif o in ("-i", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-k", "--kind"):
-            p_kind = a
-        elif o in ("-m", "--metric"):
-            p_metric = a
-        elif o in ("-s", "--scenes"):
-            p_scenes = a.split(',')
-        elif o in ("-z", "--zones"):
-            if ',' in a:
-                p_zones = list(map(int, a.split(',')))
-            else:
-                p_zones = [a.strip()]
-        elif o in ("-p", "--percent"):
-            p_percent = float(a)
-        elif o in ("-s", "--step"):
-            p_step = int(a)
-        elif o in ("-e", "--each"):
-            p_each = int(a)
-        elif o in ("-r", "--renderer"):
-            p_renderer = a
-
-            if p_renderer not in cfg.renderer_choices:
-                assert False, "Unknown renderer choice, %s" % cfg.renderer_choices
-        elif o in ("-c", "--custom"):
-            p_custom = a
-        else:
-            assert False, "unhandled option"
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--zones', type=str, help='Zones indices to use for training data set')
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)', default=1.0)
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_metric   = args.metric
+    p_scenes   = args.scenes.split(',')
+    p_zones    = list(map(int, args.zones.split(',')))
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
 
 
     # list all possibles choices of renderer
     # list all possibles choices of renderer
     scenes_list = dt.get_renderer_scenes_names(p_renderer)
     scenes_list = dt.get_renderer_scenes_names(p_renderer)

+ 385 - 0
generate_data_model_corr_random.py

@@ -0,0 +1,385 @@
+    #!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, argparse
+import numpy as np
+import pandas as pd
+import random
+import time
+import json
+import subprocess
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+
+from modules.utils import config as cfg
+from modules.utils import data as dt
+
+# getting configuration information
+config_filename         = cfg.config_filename
+learned_folder          = cfg.learned_zones_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes values
+all_scenes_list         = cfg.scenes_names
+all_scenes_indices      = cfg.scenes_indices
+
+renderer_choices        = cfg.renderer_choices
+normalization_choices   = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+metric_choices          = cfg.metric_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval      = sys.maxsize
+max_value_interval      = 0
+
+
+def construct_new_line(path_seuil, indices, line, choice, norm):
+
+    # increase indices values by one to avoid label
+    f = lambda x : x + 1
+    indices = f(indices)
+
+    line_data = np.array(line.split(';'))
+    seuil = line_data[0]
+    metrics = line_data[indices]
+    metrics = metrics.astype('float32')
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+        if choice == 'svdne':
+            metrics = utils.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            metrics = utils.normalize_arr(metrics)
+
+    with open(path_seuil, "r") as seuil_file:
+        seuil_learned = int(seuil_file.readline().strip())
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for idx, val in enumerate(metrics):
+        line += ';'
+        line += str(val)
+    line += '\n'
+
+    return line
+
+def get_min_max_value_interval(_scenes_list, _indices, _metric):
+
+    global min_value_interval, max_value_interval
+
+    # increase indices values by one to avoid label
+    f = lambda x : x + 1
+    indices = f(_indices)
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+
+                zone_path = os.path.join(scene_path, zone_folder)
+
+                # if custom normalization choices then we use svd values not already normalized
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    line_data = np.array(line.split(';'))
+
+                    metrics = line_data[[_indices]]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _scenes, _nb_zones = 4, _percent = 1, _random=0, _step=1, _custom = False):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file_data = []
+    test_file_data  = []
+
+    for id_scene, folder_scene in enumerate(_scenes_list):
+
+        scene_path = os.path.join(path, folder_scene)
+
+        zones_indices = zones
+
+        # shuffle list of zones (=> randomly choose zones)
+        # only in random mode
+        if _random:
+            random.shuffle(zones_indices)
+
+        # store zones learned
+        learned_zones_indices = zones_indices[:_nb_zones]
+
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
+
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
+
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
+
+        with open(file_learned_path, 'w') as f:
+            for i in learned_zones_indices:
+                f.write(str(i) + ';')
+
+        for id_zone, index_folder in enumerate(zones_indices):
+
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
+
+            zone_path = os.path.join(scene_path, current_zone_folder)
+
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+            else:
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+            data_file_path = os.path.join(zone_path, data_filename)
+
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
+
+            num_lines = len(lines)
+
+            # randomly shuffle image
+            if _random:
+                random.shuffle(lines)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for data in lines:
+
+                percent = counter / num_lines
+                image_index = int(data.split(';')[0])
+
+                if image_index % _step == 0:
+                    line = construct_new_line(path_seuil, _interval, data, _choice, _custom)
+
+                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                        train_file_data.append(line)
+                    else:
+                        test_file_data.append(line)
+
+                counter += 1
+
+            f.close()
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--n', type=int, help='Number of features wanted')
+    parser.add_argument('--highest', type=int, help='Specify if highest or lowest values are wishes', choices=[0, 1])
+    parser.add_argument('--label', type=int, help='Specify if label correlation is used or not', choices=[0, 1])
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set')
+    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_n        = args.n
+    p_highest  = args.highest
+    p_label    = args.label
+    p_kind     = args.kind
+    p_metric   = args.metric
+    p_scenes   = args.scenes.split(',')
+    p_nb_zones = args.nb_zones
+    p_random   = args.random
+    p_percent  = args.percent
+    p_step     = args.step
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indices.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # Get indices to keep from correlation information
+    # compute temp data file to get correlation information
+    temp_filename = 'temp'
+    temp_filename_path = os.path.join(cfg.output_data_folder, temp_filename)
+
+    cmd = ['python', 'generate_data_model_random.py',
+            '--output', temp_filename_path,
+            '--interval', '0, 200',
+            '--kind', p_kind,
+            '--metric', p_metric,
+            '--scenes', args.scenes,
+            '--nb_zones', str(16),
+            '--random', str(int(p_random)),
+            '--percent', str(p_percent),
+            '--step', str(p_step),
+            '--each', str(1),
+            '--renderer', p_renderer,
+            '--custom', temp_filename + min_max_ext]
+
+    subprocess.Popen(cmd).wait()
+
+    temp_data_file_path = temp_filename_path + '.train'
+    df = pd.read_csv(temp_data_file_path, sep=';', header=None)
+
+    indices = []
+
+    # compute correlation matrix from whole data scenes of renderer (using or not label column)
+    if p_label:
+
+        # compute pearson correlation between features and label
+        corr = df.corr()
+
+        features_corr = []
+
+        for id_row, row in enumerate(corr):
+            for id_col, val in enumerate(corr[row]):
+                if id_col == 0 and id_row != 0:
+                    features_corr.append(abs(val))
+
+    else:
+        df = df.drop(df.columns[[0]], axis=1)
+
+        # compute pearson correlation between features using only features
+        corr = df[1:200].corr()
+
+        features_corr = []
+
+        for id_row, row in enumerate(corr):
+            correlation_score = 0
+            for id_col, val in enumerate(corr[row]):
+                if id_col != id_row:
+                    correlation_score += abs(val)
+
+            features_corr.append(correlation_score)
+
+    # find `n` min or max indices to keep
+    if p_highest:
+        indices = utils.get_indices_of_highest_values(features_corr, p_n)
+    else:
+        indices = utils.get_indices_of_lowest_values(features_corr, p_n)
+
+    indices = np.sort(indices)
+
+    # save indices found
+    if not os.path.exists(cfg.correlation_indices_folder):
+        os.makedirs(cfg.correlation_indices_folder)
+
+    indices_file_path = os.path.join(cfg.correlation_indices_folder, p_filename.replace(cfg.output_data_folder + '/', '') + '.csv')
+
+    with open(indices_file_path, 'w') as f:
+        for i in indices:
+            f.write(str(i) + ';')
+
+    # find min max value if necessary to renormalize data from `n` indices found
+    if p_custom:
+        get_min_max_value_interval(scenes_list, indices, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+
+        min_max_current_filename = p_filename.replace(cfg.output_data_folder + '/', '').replace('deep_keras_', '') + min_max_filename
+        min_max_filename_path = os.path.join(min_max_folder_path, min_max_current_filename)
+
+        print(min_max_filename_path)
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(scenes_list, p_filename, indices, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_custom)
+
+if __name__== "__main__":
+    main()

+ 90 - 105
generate_data_model_random.py

@@ -7,7 +7,7 @@ Created on Fri Sep 14 21:02:42 2018
 """
 """
 
 
 from __future__ import print_function
 from __future__ import print_function
-import sys, os, getopt
+import sys, os, argparse
 import numpy as np
 import numpy as np
 import random
 import random
 import time
 import time
@@ -21,7 +21,7 @@ from modules.utils import data as dt
 
 
 # getting configuration information
 # getting configuration information
 config_filename         = cfg.config_filename
 config_filename         = cfg.config_filename
-zone_folder             = cfg.zone_folder
+learned_folder          = cfg.learned_zones_folder
 min_max_filename        = cfg.min_max_filename_extension
 min_max_filename        = cfg.min_max_filename_extension
 
 
 # define all scenes values
 # define all scenes values
@@ -33,6 +33,7 @@ path                    = cfg.dataset_path
 zones                   = cfg.zones_indices
 zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
 seuil_expe_filename     = cfg.seuil_expe_filename
 
 
+renderer_choices        = cfg.renderer_choices
 metric_choices          = cfg.metric_choices_labels
 metric_choices          = cfg.metric_choices_labels
 output_data_folder      = cfg.output_data_folder
 output_data_folder      = cfg.output_data_folder
 custom_min_max_folder   = cfg.min_max_custom_folder
 custom_min_max_folder   = cfg.min_max_custom_folder
@@ -146,75 +147,82 @@ def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _s
     if not os.path.exists(output_data_folder):
     if not os.path.exists(output_data_folder):
         os.makedirs(output_data_folder)
         os.makedirs(output_data_folder)
 
 
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
     train_file_data = []
     train_file_data = []
     test_file_data  = []
     test_file_data  = []
 
 
-    for id_scene, folder_scene in enumerate(scenes):
+    for id_scene, folder_scene in enumerate(_scenes_list):
 
 
-        # only take care of maxwell scenes
-        if folder_scene in _scenes_list:
+        scene_path = os.path.join(path, folder_scene)
 
 
-            scene_path = os.path.join(path, folder_scene)
+        zones_indices = zones
 
 
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
+        # shuffle list of zones (=> randomly choose zones)
+        # only in random mode
+        if _random:
+            random.shuffle(zones_indices)
 
 
-            # shuffle list of zones (=> randomly choose zones)
-            # only in random mode
-            if _random:
-                random.shuffle(zones_folder)
+        # store zones learned
+        learned_zones_indices = zones_indices[:_nb_zones]
 
 
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
 
 
-                # if custom normalization choices then we use svd values not already normalized
-                if _custom:
-                    data_filename = _metric + "_svd"+ generic_output_file_svd
-                else:
-                    data_filename = _metric + "_" + _choice + generic_output_file_svd
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
 
 
-                data_file_path = os.path.join(zone_path, data_filename)
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
 
 
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
+        with open(file_learned_path, 'w') as f:
+            for i in learned_zones_indices:
+                f.write(str(i) + ';')
 
 
-                num_lines = len(lines)
+        for id_zone, index_folder in enumerate(zones_indices):
 
 
-                # randomly shuffle image
-                if _random:
-                    random.shuffle(lines)
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
 
 
-                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+            zone_path = os.path.join(scene_path, current_zone_folder)
 
 
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for data in lines:
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+            else:
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
 
 
-                    percent = counter / num_lines
-                    image_index = int(data.split(';')[0])
+            data_file_path = os.path.join(zone_path, data_filename)
 
 
-                    if image_index % _step == 0:
-                        line = construct_new_line(path_seuil, _interval, data, _choice, _each, _custom)
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
 
 
-                        if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
-                            train_file_data.append(line)
-                        else:
-                            test_file_data.append(line)
+            num_lines = len(lines)
 
 
-                    counter += 1
+            # randomly shuffle image
+            if _random:
+                random.shuffle(lines)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for data in lines:
+
+                percent = counter / num_lines
+                image_index = int(data.split(';')[0])
 
 
-                f.close()
+                if image_index % _step == 0:
+                    line = construct_new_line(path_seuil, _interval, data, _choice, _each, _custom)
+
+                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                        train_file_data.append(line)
+                    else:
+                        test_file_data.append(line)
+
+                counter += 1
+
+            f.close()
 
 
     train_file = open(output_train_filename, 'w')
     train_file = open(output_train_filename, 'w')
     test_file = open(output_test_filename, 'w')
     test_file = open(output_test_filename, 'w')
@@ -231,60 +239,37 @@ def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _s
 
 
 def main():
 def main():
 
 
-    p_custom    = False
-    p_step      = 1
-    p_renderer  = 'all'
-    p_each      = 1
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 renderer all  --custom min_max_filename')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:r:p:s:e:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "random=", "percent=", "step=", "each=", "renderer=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 --renderer all --custom min_max_filename')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 --renderer all --custom min_max_filename')
-            sys.exit()
-        elif o in ("-o", "--output"):
-            p_filename = a
-        elif o in ("-i", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-k", "--kind"):
-            p_kind = a
-
-            if p_kind not in normalization_choices:
-                assert False, "Invalid normalization choice, %s" % normalization_choices
-
-        elif o in ("-m", "--metric"):
-            p_metric = a
-        elif o in ("-s", "--scenes"):
-            p_scenes = a.split(',')
-        elif o in ("-n", "--nb_zones"):
-            p_nb_zones = int(a)
-        elif o in ("-r", "--random"):
-            p_random = int(a)
-        elif o in ("-p", "--percent"):
-            p_percent = float(a)
-        elif o in ("-s", "--sep"):
-            p_sep = a
-        elif o in ("-s", "--step"):
-            p_step = int(a)
-        elif o in ("-e", "--each"):
-            p_each = int(a)
-        elif o in ("-r", "--renderer"):
-            p_renderer = a
-
-            if p_renderer not in cfg.renderer_choices:
-                assert False, "Unknown renderer choice, %s" % cfg.renderer_choices
-        elif o in ("-c", "--custom"):
-            p_custom = a
-        else:
-            assert False, "unhandled option"
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set')
+    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_metric   = args.metric
+    p_scenes   = args.scenes.split(',')
+    p_nb_zones = args.nb_zones
+    p_random   = args.random
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
 
 
     # list all possibles choices of renderer
     # list all possibles choices of renderer
     scenes_list = dt.get_renderer_scenes_names(p_renderer)
     scenes_list = dt.get_renderer_scenes_names(p_renderer)

+ 314 - 0
generate_data_model_random_center.py

@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, argparse
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+
+from modules.utils import config as cfg
+from modules.utils import data as dt
+
+# getting configuration information
+config_filename         = cfg.config_filename
+learned_folder          = cfg.learned_zones_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes values
+all_scenes_list         = cfg.scenes_names
+all_scenes_indices      = cfg.scenes_indices
+
+normalization_choices   = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+renderer_choices        = cfg.renderer_choices
+metric_choices          = cfg.metric_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval      = sys.maxsize
+max_value_interval      = 0
+abs_gap_data            = 150
+
+
+def construct_new_line(seuil_learned, interval, line, choice, each, norm):
+    begin, end = interval
+
+    line_data = line.split(';')
+    seuil = line_data[0]
+    metrics = line_data[begin+1:end+1]
+
+    # keep only if modulo result is 0 (keep only each wanted values)
+    metrics = [float(m) for id, m in enumerate(metrics) if id % each == 0]
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+
+        if choice == 'svdne':
+            metrics = utils.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            metrics = utils.normalize_arr(metrics)
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for idx, val in enumerate(metrics):
+        line += ';'
+        line += str(val)
+    line += '\n'
+
+    return line
+
+def get_min_max_value_interval(_scenes_list, _interval, _metric):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+
+                zone_path = os.path.join(scene_path, zone_folder)
+
+                # if custom normalization choices then we use svd values not already normalized
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+
+                    metrics = line_data[begin+1:end+1]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _scenes, _nb_zones = 4, _percent = 1, _random=0, _step=1, _each=1, _custom = False):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file_data = []
+    test_file_data  = []
+
+    for id_scene, folder_scene in enumerate(_scenes_list):
+
+        scene_path = os.path.join(path, folder_scene)
+
+        zones_indices = zones
+
+        # shuffle list of zones (=> randomly choose zones)
+        # only in random mode
+        if _random:
+            random.shuffle(zones_indices)
+
+        # store zones learned
+        learned_zones_indices = zones_indices[:_nb_zones]
+
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
+
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
+
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
+
+        with open(file_learned_path, 'w') as f:
+            for i in learned_zones_indices:
+                f.write(str(i) + ';')
+
+        for id_zone, index_folder in enumerate(zones_indices):
+
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
+
+            zone_path = os.path.join(scene_path, current_zone_folder)
+
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+            else:
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+            data_file_path = os.path.join(zone_path, data_filename)
+
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
+
+            num_lines = len(lines)
+
+            # randomly shuffle image
+            if _random:
+                random.shuffle(lines)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            with open(path_seuil, "r") as seuil_file:
+                seuil_learned = int(seuil_file.readline().strip())
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for data in lines:
+
+                percent = counter / num_lines
+                image_index = int(data.split(';')[0])
+
+                if image_index % _step == 0:
+
+                    with open(path_seuil, "r") as seuil_file:
+                        seuil_learned = int(seuil_file.readline().strip())
+
+                    gap_threshold = abs(seuil_learned - image_index)
+
+                    # only keep data near to threshold of zone image
+                    if gap_threshold <= abs_gap_data:
+
+                        line = construct_new_line(seuil_learned, _interval, data, _choice, _each, _custom)
+
+                        if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                            train_file_data.append(line)
+                        else:
+                            test_file_data.append(line)
+
+                counter += 1
+
+            f.close()
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set')
+    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_metric   = args.metric
+    p_scenes   = args.scenes.split(',')
+    p_nb_zones = args.nb_zones
+    p_random   = args.random
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
+
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indices.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(scenes_list, p_interval, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(scenes_list, p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_each, p_custom)
+
+if __name__== "__main__":
+    main()

+ 313 - 0
generate_data_model_random_split.py

@@ -0,0 +1,313 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, argparse
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+
+from modules.utils import config as cfg
+from modules.utils import data as dt
+
+# getting configuration information
+config_filename         = cfg.config_filename
+learned_folder          = cfg.learned_zones_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes values
+all_scenes_list         = cfg.scenes_names
+all_scenes_indices      = cfg.scenes_indices
+
+normalization_choices   = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+renderer_choices        = cfg.renderer_choices
+metric_choices          = cfg.metric_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval      = sys.maxsize
+max_value_interval      = 0
+abs_gap_data            = 100
+
+
+def construct_new_line(seuil_learned, interval, line, choice, each, norm):
+    begin, end = interval
+
+    line_data = line.split(';')
+    seuil = line_data[0]
+    metrics = line_data[begin+1:end+1]
+
+    # keep only if modulo result is 0 (keep only each wanted values)
+    metrics = [float(m) for id, m in enumerate(metrics) if id % each == 0]
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+
+        if choice == 'svdne':
+            metrics = utils.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            metrics = utils.normalize_arr(metrics)
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for idx, val in enumerate(metrics):
+        line += ';'
+        line += str(val)
+    line += '\n'
+
+    return line
+
+def get_min_max_value_interval(_scenes_list, _interval, _metric):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+
+                zone_path = os.path.join(scene_path, zone_folder)
+
+                # if custom normalization choices then we use svd values not already normalized
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+
+                    metrics = line_data[begin+1:end+1]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _scenes, _nb_zones = 4, _percent = 1, _random=0, _step=1, _each=1, _custom = False):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file_data = []
+    test_file_data  = []
+
+    for id_scene, folder_scene in enumerate(_scenes_list):
+
+        scene_path = os.path.join(path, folder_scene)
+
+        zones_indices = zones
+
+        # shuffle list of zones (=> randomly choose zones)
+        # only in random mode
+        if _random:
+            random.shuffle(zones_indices)
+
+        # store zones learned
+        learned_zones_indices = zones_indices[:_nb_zones]
+
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
+
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
+
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
+
+        with open(file_learned_path, 'w') as f:
+            for i in learned_zones_indices:
+                f.write(str(i) + ';')
+
+        for id_zone, index_folder in enumerate(zones_indices):
+
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
+
+            zone_path = os.path.join(scene_path, current_zone_folder)
+
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+            else:
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+            data_file_path = os.path.join(zone_path, data_filename)
+
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
+
+            num_lines = len(lines)
+
+            # randomly shuffle image
+            if _random:
+                random.shuffle(lines)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            with open(path_seuil, "r") as seuil_file:
+                seuil_learned = int(seuil_file.readline().strip())
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for data in lines:
+
+                percent = counter / num_lines
+                image_index = int(data.split(';')[0])
+
+                if image_index % _step == 0:
+
+                    with open(path_seuil, "r") as seuil_file:
+                        seuil_learned = int(seuil_file.readline().strip())
+
+                    gap_threshold = abs(seuil_learned - image_index)
+
+                    if gap_threshold > abs_gap_data:
+
+                        line = construct_new_line(seuil_learned, _interval, data, _choice, _each, _custom)
+
+                        if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                            train_file_data.append(line)
+                        else:
+                            test_file_data.append(line)
+
+                counter += 1
+
+            f.close()
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set')
+    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_metric   = args.metric
+    p_scenes   = args.scenes.split(',')
+    p_nb_zones = args.nb_zones
+    p_random   = args.random
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
+
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indices.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(scenes_list, p_interval, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(scenes_list, p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_each, p_custom)
+
+if __name__== "__main__":
+    main()

+ 7 - 0
generate_metrics_curve.sh

@@ -0,0 +1,7 @@
+for metric in {"lab","mscn","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
+
+    python display_svd_data_scene.py --scene D --interval "0, 800" --indices "0, 1200" --metric ${metric} --mode svdne --step 100 --norm 1 --error mse --ylim "0, 0.1"
+
+done
+
+

+ 7 - 2
modules/utils/config.py

@@ -7,13 +7,15 @@ threshold_map_folder            = 'threshold_map'
 models_information_folder       = 'models_info'
 models_information_folder       = 'models_info'
 saved_models_folder             = 'saved_models'
 saved_models_folder             = 'saved_models'
 min_max_custom_folder           = 'custom_norm'
 min_max_custom_folder           = 'custom_norm'
+learned_zones_folder            = 'learned_zones'
+correlation_indices_folder      = 'corr_indices'
 
 
 csv_model_comparisons_filename  = "models_comparisons.csv"
 csv_model_comparisons_filename  = "models_comparisons.csv"
 seuil_expe_filename             = 'seuilExpe'
 seuil_expe_filename             = 'seuilExpe'
 min_max_filename_extension      = "_min_max_values"
 min_max_filename_extension      = "_min_max_values"
 config_filename                 = "config"
 config_filename                 = "config"
 
 
-models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2"]
+models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2","deep_keras"]
 
 
 # define all scenes values
 # define all scenes values
 renderer_choices                = ['all', 'maxwell', 'igloo', 'cycle']
 renderer_choices                = ['all', 'maxwell', 'igloo', 'cycle']
@@ -33,4 +35,7 @@ cycle_scenes_indices            = ['E', 'I']
 normalization_choices           = ['svd', 'svdn', 'svdne']
 normalization_choices           = ['svd', 'svdn', 'svdne']
 zones_indices                   = np.arange(16)
 zones_indices                   = np.arange(16)
 
 
-metric_choices_labels           = ['lab', 'mscn_revisited', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2', 'sub_blocks_stats', 'sub_blocks_area', 'sub_blocks_stats_reduced']
+metric_choices_labels           = ['lab', 'mscn', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2', 'sub_blocks_stats', 'sub_blocks_area', 'sub_blocks_stats_reduced', 'sub_blocks_area_normed', 'mscn_var_4', 'mscn_var_16', 'mscn_var_64', 'mscn_var_16_max', 'mscn_var_64_max']
+
+keras_epochs                    = 500
+keras_batch                     = 32

+ 66 - 2
modules/utils/data.py

@@ -25,7 +25,7 @@ def get_svd_data(data_type, block):
         block.save(block_file_path)
         block.save(block_file_path)
         data = processing.get_LAB_L_SVD_s(Image.open(block_file_path))
         data = processing.get_LAB_L_SVD_s(Image.open(block_file_path))
 
 
-    if data_type == 'mscn_revisited':
+    if data_type == 'mscn':
 
 
         img_mscn_revisited = processing.rgb_to_mscn(block)
         img_mscn_revisited = processing.rgb_to_mscn(block)
 
 
@@ -38,7 +38,7 @@ def get_svd_data(data_type, block):
         # extract from temp image
         # extract from temp image
         data = metrics.get_SVD_s(img_block)
         data = metrics.get_SVD_s(img_block)
 
 
-    if data_type == 'mscn':
+    """if data_type == 'mscn':
 
 
         img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
         img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
         img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
         img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
@@ -47,6 +47,7 @@ def get_svd_data(data_type, block):
         img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
         img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
 
 
         data = metrics.get_SVD_s(img_mscn_gray)
         data = metrics.get_SVD_s(img_mscn_gray)
+    """
 
 
     if data_type == 'low_bits_6':
     if data_type == 'low_bits_6':
 
 
@@ -151,9 +152,72 @@ def get_svd_data(data_type, block):
         # convert into numpy array after computing all stats
         # convert into numpy array after computing all stats
         data = np.asarray(data)
         data = np.asarray(data)
 
 
+    if data_type == 'sub_blocks_area_normed':
+
+        block = np.asarray(block)
+        width, height, _= block.shape
+        sub_width, sub_height = int(width / 8), int(height / 8)
+
+        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
+
+        data = []
+
+        for sub_b in sub_blocks:
+
+            # by default use the whole lab L canal
+            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
+            l_svd_data = utils.normalize_arr(l_svd_data)
+
+            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=50)
+            data.append(area_under_curve)
+
+        # convert into numpy array after computing all stats
+        data = np.asarray(data)
+
+    if data_type == 'mscn_var_4':
+
+        data = _get_mscn_variance(block, (100, 100))
+
+    if data_type == 'mscn_var_16':
+
+        data = _get_mscn_variance(block, (50, 50))
+
+    if data_type == 'mscn_var_64':
+
+        data = _get_mscn_variance(block, (25, 25))
+
+    if data_type == 'mscn_var_16_max':
+
+        data = _get_mscn_variance(block, (50, 50))
+        data = np.asarray(data)
+        size = int(len(data) / 4)
+        indices = data.argsort()[-size:][::-1]
+        data = data[indices]
+
+    if data_type == 'mscn_var_64_max':
+
+        data = _get_mscn_variance(block, (25, 25))
+        data = np.asarray(data)
+        size = int(len(data) / 4)
+        indices = data.argsort()[-size:][::-1]
+        data = data[indices]
 
 
     return data
     return data
 
 
+def _get_mscn_variance(block, sub_block_size=(50, 50)):
+
+    blocks = processing.divide_in_blocks(block, sub_block_size)
+
+    data = []
+
+    for block in blocks:
+        mscn_coefficients = processing.get_mscn_coefficients(block)
+        flat_coeff = mscn_coefficients.flatten()
+        data.append(np.var(flat_coeff))
+
+    return np.sort(data)
+
+
 def get_renderer_scenes_indices(renderer_name):
 def get_renderer_scenes_indices(renderer_name):
 
 
     if renderer_name not in renderer_choices:
     if renderer_name not in renderer_choices:

+ 64 - 42
predict_noisy_image_svd.py

@@ -5,7 +5,9 @@ import numpy as np
 from ipfml import processing, utils
 from ipfml import processing, utils
 from PIL import Image
 from PIL import Image
 
 
-import sys, os, getopt
+import sys, os, argparse, json
+
+from keras.models import model_from_json
 
 
 from modules.utils import config as cfg
 from modules.utils import config as cfg
 from modules.utils import data as dt
 from modules.utils import data as dt
@@ -19,46 +21,55 @@ custom_min_max_folder = cfg.min_max_custom_folder
 
 
 def main():
 def main():
 
 
-    p_custom = False
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --metric lab --mode ["svdn", "svdne"] --custom min_max_file')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:m:o:c", ["help=", "image=", "interval=", "model=", "metric=", "mode=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --metric lab --mode ["svdn", "svdne"] --custom min_max_file')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --metric lab --mode ["svdn", "svdne"] --custom min_max_file')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file = os.path.join(os.path.dirname(__file__), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.dirname(__file__), a)
-        elif o in ("-m", "--metric"):
-            p_metric = a
-
-            if not p_metric in metric_choices:
-                assert False, "Unknow metric choice"
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if not p_mode in normalization_choices:
-                assert False, "Mode of normalization not recognized"
-        elif o in ("-c", "--custom"):
-            p_custom = a
+    # getting all params
+    parser = argparse.ArgumentParser(description="Script which detects if an image is noisy or not using specific model")
 
 
-        else:
-            assert False, "unhandled option"
+    parser.add_argument('--image', type=str, help='Image path')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_img_file   = args.image
+    p_model_file = args.model
+    p_interval   = list(map(int, args.interval.split(',')))
+    p_mode       = args.mode
+    p_metric     = args.metric
+    p_custom     = args.custom
+
+    if '.joblib' in p_model_file:
+        kind_model = 'sklearn'
+
+    if '.json' in p_model_file:
+        kind_model = 'keras'
+
+    if 'corr' in p_model_file:
+        corr_model = True
+
+        indices_corr_path = os.path.join(cfg.correlation_indices_folder, p_model_file.split('/')[1].replace('.json', '').replace('.joblib', '') + '.csv')
 
 
-    # load of model file
-    model = joblib.load(p_model_file)
+        with open(indices_corr_path, 'r') as f:
+            data_corr_indices = [int(x) for x in f.readline().split(';') if x != '']
+    else:
+        corr_model = False
+
+
+    if kind_model == 'sklearn':
+        # load of model file
+        model = joblib.load(p_model_file)
+
+    if kind_model == 'keras':
+        with open(p_model_file, 'r') as f:
+            json_model = json.load(f)
+            model = model_from_json(json_model)
+            model.load_weights(p_model_file.replace('.json', '.h5'))
+
+            model.compile(loss='binary_crossentropy',
+                        optimizer='adam',
+                        metrics=['accuracy'])
 
 
     # load image
     # load image
     img = Image.open(p_img_file)
     img = Image.open(p_img_file)
@@ -71,7 +82,10 @@ def main():
     # check if custom min max file is used
     # check if custom min max file is used
     if p_custom:
     if p_custom:
 
 
-        test_data = data[begin:end]
+        if corr_model:
+            test_data = data[data_corr_indices]
+        else:
+            test_data = data[begin:end]
 
 
         if p_mode == 'svdne':
         if p_mode == 'svdne':
 
 
@@ -110,11 +124,19 @@ def main():
         else:
         else:
             l_values = data
             l_values = data
 
 
-        test_data = l_values[begin:end]
+        if corr_model:
+            test_data = data[data_corr_indices]
+        else:
+            test_data = data[begin:end]
 
 
 
 
     # get prediction of model
     # get prediction of model
-    prediction = model.predict([test_data])[0]
+    if kind_model == 'sklearn':
+        prediction = model.predict([test_data])[0]
+
+    if kind_model == 'keras':
+        test_data = np.asarray(test_data).reshape(1, len(test_data), 1)
+        prediction = model.predict_classes([test_data])[0][0]
 
 
     # output expected from others scripts
     # output expected from others scripts
     print(prediction)
     print(prediction)

+ 44 - 5
prediction_scene.py

@@ -4,8 +4,15 @@ import numpy as np
 
 
 import pandas as pd
 import pandas as pd
 from sklearn.metrics import accuracy_score
 from sklearn.metrics import accuracy_score
+from keras.models import Sequential
+from keras.layers import Conv1D, MaxPooling1D
+from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
+from keras import backend as K
+from keras.models import model_from_json
+from keras.wrappers.scikit_learn import KerasClassifier
 
 
 import sys, os, getopt
 import sys, os, getopt
+import json
 
 
 from modules.utils import config as cfg
 from modules.utils import config as cfg
 
 
@@ -38,6 +45,14 @@ def main():
         else:
         else:
             assert False, "unhandled option"
             assert False, "unhandled option"
 
 
+    if '.joblib' in p_model_file:
+        kind_model = 'sklearn'
+        model_ext = '.joblib'
+
+    if '.json' in p_model_file:
+        kind_model = 'keras'
+        model_ext = '.json'
+
     if not os.path.exists(output_model_folder):
     if not os.path.exists(output_model_folder):
         os.makedirs(output_model_folder)
         os.makedirs(output_model_folder)
 
 
@@ -55,11 +70,36 @@ def main():
     y_not_noisy_dataset = not_noisy_dataset.ix[:, 0]
     y_not_noisy_dataset = not_noisy_dataset.ix[:, 0]
     x_not_noisy_dataset = not_noisy_dataset.ix[:, 1:]
     x_not_noisy_dataset = not_noisy_dataset.ix[:, 1:]
 
 
-    model = joblib.load(p_model_file)
+    if kind_model == 'keras':
+        with open(p_model_file, 'r') as f:
+            json_model = json.load(f)
+            model = model_from_json(json_model)
+            model.load_weights(p_model_file.replace('.json', '.h5'))
+
+            model.compile(loss='binary_crossentropy',
+                  optimizer='adam',
+                  metrics=['accuracy'])
+
+        _, vector_size = np.array(x_dataset).shape
+
+        # reshape all data
+        x_dataset = np.array(x_dataset).reshape(len(x_dataset), vector_size, 1)
+        x_noisy_dataset = np.array(x_noisy_dataset).reshape(len(x_noisy_dataset), vector_size, 1)
+        x_not_noisy_dataset = np.array(x_not_noisy_dataset).reshape(len(x_not_noisy_dataset), vector_size, 1)
 
 
-    y_pred = model.predict(x_dataset)
-    y_noisy_pred = model.predict(x_noisy_dataset)
-    y_not_noisy_pred = model.predict(x_not_noisy_dataset)
+
+    if kind_model == 'sklearn':
+        model = joblib.load(p_model_file)
+
+    if kind_model == 'keras':
+        y_pred = model.predict_classes(x_dataset)
+        y_noisy_pred = model.predict_classes(x_noisy_dataset)
+        y_not_noisy_pred = model.predict_classes(x_not_noisy_dataset)
+
+    if kind_model == 'sklearn':
+        y_pred = model.predict(x_dataset)
+        y_noisy_pred = model.predict(x_noisy_dataset)
+        y_not_noisy_pred = model.predict(x_not_noisy_dataset)
 
 
     accuracy_global = accuracy_score(y_dataset, y_pred)
     accuracy_global = accuracy_score(y_dataset, y_pred)
     accuracy_noisy = accuracy_score(y_noisy_dataset, y_noisy_pred)
     accuracy_noisy = accuracy_score(y_noisy_dataset, y_noisy_pred)
@@ -77,6 +117,5 @@ def main():
             for prediction in y_pred:
             for prediction in y_pred:
                 f.write(str(prediction) + '\n')
                 f.write(str(prediction) + '\n')
 
 
-
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 7 - 0
runAll_display_data_scene.sh

@@ -0,0 +1,7 @@
+#! bin/bash
+
+for metric in {"lab","mscn","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
+    for scene in {"A","D","G","H"}; do
+        python display_svd_data_scene.py --scene ${scene} --interval "0,800" --indices "0, 2000" --metric ${metric} --mode svdne --step 100 --norm 1 --ylim "0, 0.01"
+    done
+done

+ 1 - 1
runAll_maxwell.sh

@@ -18,7 +18,7 @@ fi
 
 
 for size in {"4","8","16","26","32","40"}; do
 for size in {"4","8","16","26","32","40"}; do
 
 
-    for metric in {"lab","mscn","mscn_revisited","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
+    for metric in {"lab","mscn","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
         bash generateAndTrain_maxwell.sh ${size} ${metric}
         bash generateAndTrain_maxwell.sh ${size} ${metric}
     done
     done
 done
 done

+ 1 - 1
runAll_maxwell_area.sh

@@ -30,7 +30,7 @@ for nb_zones in {4,6,8,10,12}; do
     for mode in {"svd","svdn","svdne"}; do
     for mode in {"svd","svdn","svdne"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
 
-            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            FILENAME="data/${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
 
 
             echo $FILENAME
             echo $FILENAME

+ 52 - 0
runAll_maxwell_area_normed.sh

@@ -0,0 +1,52 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start_index; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+metric="sub_blocks_area_normed"
+start_index=0
+end_index=16
+number=16
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+
+for nb_zones in {4,6,8,10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+            FILENAME="data/${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+            fi
+        done
+    done
+done
+
+

+ 56 - 0
runAll_maxwell_corr_custom.sh

@@ -0,0 +1,56 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+
+fi
+
+start_index=0
+end_index=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+metric="lab"
+
+for label in {"0","1"}; do
+    for highest in {"0","1"}; do
+        for nb_zones in {4,6,8,10,12}; do
+            for size in {5,10,15,20,25,30,35,40}; do
+                for mode in {"svd","svdn","svdne"}; do
+                    for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+                        FILENAME="data/${model}_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+                        MODEL_NAME="${model}_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+
+                        echo $FILENAME
+
+                        # only compute if necessary (perhaps server will fall.. Just in case)
+                        if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                            echo "${MODEL_NAME} results already generated..."
+                        else
+                            python generate_data_model_corr_random.py --output ${FILENAME} --n ${size} --highest ${highest} --label ${label} --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom 1
+                            python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                            # use of interval but it is not really an interval..
+                            python save_model_result_in_md_maxwell.py --interval "${start_index},${size}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+                        fi
+                    done
+                done
+            done
+        done
+    done
+done
+

+ 1 - 1
runAll_maxwell_custom.sh

@@ -18,7 +18,7 @@ fi
 
 
 for size in {"4","8","16","26","32","40"}; do
 for size in {"4","8","16","26","32","40"}; do
 
 
-    for metric in {"lab","mscn","mscn_revisited","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
+    for metric in {"lab","mscn","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
         bash generateAndTrain_maxwell_custom.sh ${size} ${metric}
         bash generateAndTrain_maxwell_custom.sh ${size} ${metric}
     done
     done
 done
 done

+ 24 - 0
runAll_maxwell_custom_center.sh

@@ -0,0 +1,24 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+for size in {"4","8","16","26","32","40"}; do
+
+    for metric in {"lab","mscn","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
+        bash generateAndTrain_maxwell_custom_center.sh ${size} ${metric}
+    done
+done

+ 24 - 0
runAll_maxwell_custom_split.sh

@@ -0,0 +1,24 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+for size in {"4","8","16","26","32","40"}; do
+
+    for metric in {"lab","mscn","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
+        bash generateAndTrain_maxwell_custom_split.sh ${size} ${metric}
+    done
+done

+ 55 - 0
runAll_maxwell_keras.sh

@@ -0,0 +1,55 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+
+fi
+
+start_index=0
+end_index=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+declare -A metrics_size
+metrics_size=( ["sub_blocks_stats"]="24" ["sub_blocks_stats_reduced"]="20" ["sub_blocks_area"]="16" ["sub_blocks_area_normed"]="20")
+
+for metric in {"sub_blocks_stats","sub_blocks_stats_reduced","sub_blocks_area","sub_blocks_area_normed"}; do
+    for nb_zones in {4,6,8,10,12}; do
+
+        for mode in {"svd","svdn","svdne"}; do
+
+            end_index=${metrics_size[${metric}]}
+
+            FILENAME="data/deep_keras_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="deep_keras_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                echo "test"
+                #python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                #python deep_network_keras_svd.py --data ${FILENAME} --output ${MODEL_NAME} --size ${end_index}
+
+                #python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric}
+            fi
+        done
+    done
+done
+

+ 54 - 0
runAll_maxwell_keras_corr.sh

@@ -0,0 +1,54 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+
+fi
+
+start_index=0
+end_index=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+metric="lab"
+
+for label in {"0","1"}; do
+    for highest in {"0","1"}; do
+        for nb_zones in {4,6,8,10,12}; do
+            for size in {5,10,15,20,25,30,35,40}; do
+                for mode in {"svd","svdn","svdne"}; do
+
+                    FILENAME="data/deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+                    MODEL_NAME="deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+
+                    echo $FILENAME
+
+                    # only compute if necessary (perhaps server will fall.. Just in case)
+                    if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                        echo "${MODEL_NAME} results already generated..."
+                    else
+                        python generate_data_model_corr_random.py --output ${FILENAME} --n ${size} --highest ${highest} --label ${label} --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                        python deep_network_keras_svd.py --data ${FILENAME} --output ${MODEL_NAME} --size ${size}
+
+                        # use of interval but it is not really an interval..
+                        python save_model_result_in_md_maxwell.py --interval "${start_index},${size}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric}
+                    fi
+                done
+            done
+        done
+    done
+done
+

+ 54 - 0
runAll_maxwell_keras_corr_custom.sh

@@ -0,0 +1,54 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+
+fi
+
+start_index=0
+end_index=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+metric="lab"
+
+for label in {"0","1"}; do
+    for highest in {"0","1"}; do
+        for nb_zones in {4,6,8,10,12}; do
+            for size in {5,10,15,20,25,30,35,40}; do
+                for mode in {"svd","svdn","svdne"}; do
+
+                    FILENAME="data/deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+                    MODEL_NAME="deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+
+                    echo $FILENAME
+
+                    # only compute if necessary (perhaps server will fall.. Just in case)
+                    if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                        echo "${MODEL_NAME} results already generated..."
+                    else
+                        python generate_data_model_corr_random.py --output ${FILENAME} --n ${size} --highest ${highest} --label ${label} --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom 1
+                        python deep_network_keras_svd.py --data ${FILENAME} --output ${MODEL_NAME} --size ${size}
+
+                        # use of interval but it is not really an interval..
+                        python save_model_result_in_md_maxwell.py --interval "${start_index},${size}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric}
+                    fi
+                done
+            done
+        done
+    done
+done
+

+ 56 - 0
runAll_maxwell_mscn_var.sh

@@ -0,0 +1,56 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start_index; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+start_index=0
+end_index=4
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+declare -A metrics_size
+metrics_size=( ["mscn_var_4"]=4 ["mscn_var_16"]=16 ["mscn_var_64"]=64 ["mscn_var_16_max"]=4 ["mscn_var_64_max"]=16)
+
+for nb_zones in {4,6,8,10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for metric in {"mscn_var_4","mscn_var_16","mscn_var_64","mscn_var_16_max","mscn_var_64_max"}; do
+            for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+                end_index=${metrics_size[${metric}]}
+
+                FILENAME="data/${model}_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+                MODEL_NAME="${model}_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+                echo $FILENAME
+
+                # only compute if necessary (perhaps server will fall.. Just in case)
+                if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                    echo "${MODEL_NAME} results already generated..."
+                else
+                    python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                    python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                    python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+                fi
+            done
+        done
+    done
+done
+
+

+ 1 - 1
runAll_maxwell_sub_blocks_stats.sh

@@ -30,7 +30,7 @@ for nb_zones in {4,6,8,10,12}; do
     for mode in {"svd","svdn","svdne"}; do
     for mode in {"svd","svdn","svdne"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
 
-            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            FILENAME="data/${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
 
 
             echo $FILENAME
             echo $FILENAME

+ 1 - 1
runAll_maxwell_sub_blocks_stats_reduced.sh

@@ -30,7 +30,7 @@ for nb_zones in {4,6,8,10,12}; do
     for mode in {"svd","svdn","svdne"}; do
     for mode in {"svd","svdn","svdne"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
 
-            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            FILENAME="data/${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
 
 
             echo $FILENAME
             echo $FILENAME

+ 1 - 1
run_maxwell_simulation.sh

@@ -31,7 +31,7 @@ for size in {"4","8","16","26","32","40"}; do
                  for mode in {"svd","svdn","svdne"}; do
                  for mode in {"svd","svdn","svdne"}; do
                      for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
                      for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
 
-                        FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                        FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
 
 
                         if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
                         if grep -xq "${MODEL_NAME}" "${simulate_models}"; then

+ 43 - 0
run_maxwell_simulation_corr_custom.sh

@@ -0,0 +1,43 @@
+#! bin/bash
+
+# file which contains model names we want to use for simulation
+simulate_models="simulate_models.csv"
+
+start_index=0
+size=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+metric="lab"
+
+for label in {"0","1"}; do
+    for highest in {"0","1"}; do
+        for nb_zones in {4,6,8,10,12}; do
+            for size in {5,10,15,20,25,30,35,40}; do
+                for mode in {"svd","svdn","svdne"}; do
+                    for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+                            FILENAME="data/${model}_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+                            MODEL_NAME="${model}_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+                            CUSTOM_MIN_MAX_FILENAME="${model}_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}_min_max_values"
+
+                            echo ${MODEL_NAME}
+
+                        if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
+                            echo "Run simulation for model ${MODEL_NAME}"
+
+                            python generate_data_model_corr_random.py --output ${FILENAME} --n ${size} --highest ${highest} --label ${label} --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom 1
+
+                            python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                            python predict_seuil_expe_maxwell_curve.py --interval "${start_index},${size}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                            python save_model_result_in_md_maxwell.py --interval "${start_index},${size}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+
+                        fi
+                    done
+                done
+            done
+        done
+    done
+done

+ 1 - 1
run_maxwell_simulation_custom.sh

@@ -31,7 +31,7 @@ for size in {"4","8","16","26","32","40"}; do
                  for mode in {"svd","svdn","svdne"}; do
                  for mode in {"svd","svdn","svdne"}; do
                      for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
                      for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
 
-                        FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                        FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
                         CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
 
 

+ 43 - 0
run_maxwell_simulation_keras_corr_custom.sh

@@ -0,0 +1,43 @@
+#! bin/bash
+
+# file which contains model names we want to use for simulation
+simulate_models="simulate_models_keras_corr.csv"
+
+start_index=0
+size=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+metric="lab"
+
+for label in {"0","1"}; do
+    for highest in {"0","1"}; do
+        for nb_zones in {4,6,8,10,12}; do
+            for size in {5,10,15,20,25,30,35,40}; do
+                for mode in {"svd","svdn","svdne"}; do
+
+                    FILENAME="data/deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+                    MODEL_NAME="deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+
+
+                    CUSTOM_MIN_MAX_FILENAME="N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}_min_max_values"
+
+                    echo ${MODEL_NAME}
+
+                    if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
+                        echo "Run simulation for model ${MODEL_NAME}"
+
+                        python generate_data_model_corr_random.py --output ${FILENAME} --n ${size} --highest ${highest} --label ${label} --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom 1
+
+                        python deep_network_keras_svd.py --data ${FILENAME} --output ${MODEL_NAME} --size ${size}
+
+                        python predict_seuil_expe_maxwell_curve.py --interval "${start_index},${size}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                        python save_model_result_in_md_maxwell.py --interval "${start_index},${size}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric}
+
+                    fi
+                done
+            done
+        done
+    done
+done

+ 38 - 0
run_maxwell_simulation_keras_custom.sh

@@ -0,0 +1,38 @@
+#! bin/bash
+
+# file which contains model names we want to use for simulation
+simulate_models="simulate_models_keras.csv"
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+start_index=0
+metrics_size=( ["sub_blocks_stats"]=24 ["sub_blocks_stats_reduced"]=20 ["sub_blocks_area"]=16 ["sub_blocks_area_normed"]=20)
+
+for metric in {"sub_blocks_stats","sub_blocks_stats_reduced","sub_blocks_area","sub_blocks_area_normed"}; do
+    for nb_zones in {4,6,8,10,12}; do
+
+        for mode in {"svd","svdn","svdne"}; do
+
+            end_index=${metrics_size[${metric}]}
+            FILENAME="data/deep_keras_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="deep_keras_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
+
+            if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
+                echo "Run simulation for model ${MODEL_NAME}"
+
+                # by default regenerate model
+                python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                python predict_seuil_expe_maxwell_curve.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric}
+
+            fi
+        done
+    done
+done

+ 77 - 17
save_model_result_in_md_maxwell.py

@@ -2,8 +2,16 @@ from sklearn.utils import shuffle
 from sklearn.externals import joblib
 from sklearn.externals import joblib
 from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score
 from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score
 from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import StratifiedKFold
 from sklearn.model_selection import train_test_split
 from sklearn.model_selection import train_test_split
 
 
+from keras.models import Sequential
+from keras.layers import Conv1D, MaxPooling1D
+from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
+from keras.wrappers.scikit_learn import KerasClassifier
+from keras import backend as K
+from keras.models import model_from_json
+
 import numpy as np
 import numpy as np
 import pandas as pd
 import pandas as pd
 
 
@@ -13,6 +21,7 @@ from PIL import Image
 import sys, os, getopt
 import sys, os, getopt
 import subprocess
 import subprocess
 import time
 import time
+import json
 
 
 from modules.utils import config as cfg
 from modules.utils import config as cfg
 
 
@@ -30,6 +39,9 @@ current_dirpath = os.getcwd()
 
 
 def main():
 def main():
 
 
+    kind_model = 'keras'
+    model_ext = ''
+
     if len(sys.argv) <= 1:
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
         print('Run with default parameters...')
         print('python save_model_result_in_md.py --interval "0,20" --model path/to/xxxx.joblib --mode ["svd", "svdn", "svdne"] --metric ["lab", "mscn"]')
         print('python save_model_result_in_md.py --interval "0,20" --model path/to/xxxx.joblib --mode ["svd", "svdn", "svdne"] --metric ["lab", "mscn"]')
@@ -58,9 +70,7 @@ def main():
         else:
         else:
             assert False, "unhandled option"
             assert False, "unhandled option"
 
 
-
     # call model and get global result in scenes
     # call model and get global result in scenes
-
     begin, end = p_interval
     begin, end = p_interval
 
 
     bash_cmd = "bash testModelByScene_maxwell.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
     bash_cmd = "bash testModelByScene_maxwell.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
@@ -79,7 +89,16 @@ def main():
         os.makedirs(markdowns_folder)
         os.makedirs(markdowns_folder)
 
 
     # get model name to construct model
     # get model name to construct model
-    md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace('.joblib', '.md'))
+
+    if '.joblib' in p_model_file:
+        kind_model = 'sklearn'
+        model_ext = '.joblib'
+
+    if '.json' in p_model_file:
+        kind_model = 'keras'
+        model_ext = '.json'
+
+    md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace(model_ext, '.md'))
 
 
     with open(md_model_path, 'w') as f:
     with open(md_model_path, 'w') as f:
         f.write(output.decode("utf-8"))
         f.write(output.decode("utf-8"))
@@ -109,7 +128,7 @@ def main():
         f.close()
         f.close()
 
 
     # Keep model information to compare
     # Keep model information to compare
-    current_model_name = p_model_file.split('/')[-1].replace('.joblib', '')
+    current_model_name = p_model_file.split('/')[-1].replace(model_ext, '')
 
 
     # Prepare writing in .csv file
     # Prepare writing in .csv file
     output_final_file_path = os.path.join(markdowns_folder, final_csv_model_comparisons)
     output_final_file_path = os.path.join(markdowns_folder, final_csv_model_comparisons)
@@ -119,8 +138,11 @@ def main():
     # reconstruct data filename
     # reconstruct data filename
     for name in models_name:
     for name in models_name:
         if name in current_model_name:
         if name in current_model_name:
-            current_data_file_path = os.path.join('data', current_model_name.replace(name, 'data_maxwell'))
+            data_filename = current_model_name
+            current_data_file_path = os.path.join('data', data_filename)
 
 
+    print("Current data file ")
+    print(current_data_file_path)
     model_scores = []
     model_scores = []
 
 
     ########################
     ########################
@@ -163,13 +185,35 @@ def main():
     # 2. Getting model
     # 2. Getting model
     #######################
     #######################
 
 
-    model = joblib.load(p_model_file)
+    if kind_model == 'keras':
+        with open(p_model_file, 'r') as f:
+            json_model = json.load(f)
+            model = model_from_json(json_model)
+            model.load_weights(p_model_file.replace('.json', '.h5'))
+
+            model.compile(loss='binary_crossentropy',
+                        optimizer='adam',
+                        metrics=['accuracy'])
+
+        # reshape all input data
+        x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), end, 1)
+        x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), end, 1)
+
+
+    if kind_model == 'sklearn':
+        model = joblib.load(p_model_file)
 
 
     #######################
     #######################
     # 3. Fit model : use of cross validation to fit model
     # 3. Fit model : use of cross validation to fit model
     #######################
     #######################
-    model.fit(x_dataset_train, y_dataset_train)
-    val_scores = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
+
+    if kind_model == 'keras':
+        model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
+
+    if kind_model == 'sklearn':
+        model.fit(x_dataset_train, y_dataset_train)
+
+        train_accuracy = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
 
 
     ######################
     ######################
     # 4. Test : Validation and test dataset from .test dataset
     # 4. Test : Validation and test dataset from .test dataset
@@ -187,14 +231,23 @@ def main():
 
 
     X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
     X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
 
 
-    y_test_model = model.predict(X_test)
-    y_val_model = model.predict(X_val)
+    if kind_model == 'keras':
+        y_test_model = model.predict_classes(X_test)
+        y_val_model = model.predict_classes(X_val)
+
+        y_train_model = model.predict_classes(x_dataset_train)
+
+        train_accuracy = accuracy_score(y_dataset_train, y_train_model)
+
+    if kind_model == 'sklearn':
+        y_test_model = model.predict(X_test)
+        y_val_model = model.predict(X_val)
+
+        y_train_model = model.predict(x_dataset_train)
 
 
     val_accuracy = accuracy_score(y_val, y_val_model)
     val_accuracy = accuracy_score(y_val, y_val_model)
     test_accuracy = accuracy_score(y_test, y_test_model)
     test_accuracy = accuracy_score(y_test, y_test_model)
 
 
-    y_train_model = model.predict(x_dataset_train)
-
     train_f1 = f1_score(y_dataset_train, y_train_model)
     train_f1 = f1_score(y_dataset_train, y_train_model)
     train_recall = recall_score(y_dataset_train, y_train_model)
     train_recall = recall_score(y_dataset_train, y_train_model)
     train_roc_auc = roc_auc_score(y_dataset_train, y_train_model)
     train_roc_auc = roc_auc_score(y_dataset_train, y_train_model)
@@ -207,11 +260,18 @@ def main():
     test_recall = recall_score(y_test, y_test_model)
     test_recall = recall_score(y_test, y_test_model)
     test_roc_auc = roc_auc_score(y_test, y_test_model)
     test_roc_auc = roc_auc_score(y_test, y_test_model)
 
 
-    # stats of all dataset
-    all_x_data = pd.concat([x_dataset_train, X_test, X_val])
-    all_y_data = pd.concat([y_dataset_train, y_test, y_val])
+    if kind_model == 'keras':
+        # stats of all dataset
+        all_x_data = np.concatenate([x_dataset_train, X_test, X_val])
+        all_y_data = np.concatenate([y_dataset_train, y_test, y_val])
+        all_y_model = model.predict_classes(all_x_data)
+
+    if kind_model == 'sklearn':
+        # stats of all dataset
+        all_x_data = pd.concat([x_dataset_train, X_test, X_val])
+        all_y_data = pd.concat([y_dataset_train, y_test, y_val])
+        all_y_model = model.predict(all_x_data)
 
 
-    all_y_model = model.predict(all_x_data)
     all_accuracy = accuracy_score(all_y_data, all_y_model)
     all_accuracy = accuracy_score(all_y_data, all_y_model)
     all_f1_score = f1_score(all_y_data, all_y_model)
     all_f1_score = f1_score(all_y_data, all_y_model)
     all_recall_score = recall_score(all_y_data, all_y_model)
     all_recall_score = recall_score(all_y_data, all_y_model)
@@ -229,7 +289,7 @@ def main():
     model_scores.append(test_set_size / total_samples)
     model_scores.append(test_set_size / total_samples)
 
 
     # add of scores
     # add of scores
-    model_scores.append(val_scores.mean())
+    model_scores.append(train_accuracy)
     model_scores.append(val_accuracy)
     model_scores.append(val_accuracy)
     model_scores.append(test_accuracy)
     model_scores.append(test_accuracy)
     model_scores.append(all_accuracy)
     model_scores.append(all_accuracy)

+ 5 - 0
simulate_models.csv

@@ -0,0 +1,5 @@
+ensemble_model_N40_B0_E40_nb_zones_10_lab_svdne_corr_L0_H1
+ensemble_model_N35_B0_E35_nb_zones_12_lab_svdne_corr_L0_H1
+ensemble_model_N40_B0_E40_nb_zones_12_lab_svdne_corr_L0_H1
+svm_model_N25_B0_E25_nb_zones_4_lab_svdne_corr_L0_H1
+ensemble_model_N40_B0_E40_nb_zones_8_lab_svdne_corr_L0_H1

+ 5 - 0
simulate_models_keras.csv

@@ -0,0 +1,5 @@
+deep_keras_N35_B0_E35_nb_zones_10_lab_svd_corr_L0_H1
+deep_keras_N25_B0_E25_nb_zones_10_lab_svdne_corr_L0_H1
+deep_keras_N30_B0_E30_nb_zones_6_lab_svdne_corr_L0_H1
+deep_keras_N5_B0_E5_nb_zones_12_lab_svdn_corr_L1_H1
+deep_keras_N25_B0_E25_nb_zones_6_lab_svdne_corr_L0_H1

+ 5 - 0
simulate_models_keras_corr.csv

@@ -0,0 +1,5 @@
+deep_keras_N35_B0_E35_nb_zones_10_lab_svd_corr_L0_H1
+deep_keras_N25_B0_E25_nb_zones_10_lab_svdne_corr_L0_H1
+deep_keras_N30_B0_E30_nb_zones_6_lab_svdne_corr_L0_H1
+deep_keras_N5_B0_E5_nb_zones_12_lab_svdn_corr_L1_H1
+deep_keras_N25_B0_E25_nb_zones_6_lab_svdne_corr_L0_H1