Browse Source

New models creation

Jerome Buisine 1 year ago
parent
commit
5911c7da90

+ 3 - 0
.gitignore

@@ -1,5 +1,8 @@
 # project data
 data
+svm_data
+
+fichiersSVD
 .python-version
 __pycache__
 

+ 84 - 0
ensemble_model_train.py

@@ -0,0 +1,84 @@
+from sklearn.model_selection import train_test_split
+from sklearn.model_selection import GridSearchCV
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier, VotingClassifier
+
+import sklearn.svm as svm
+from sklearn.externals import joblib
+
+import numpy as np
+
+
+import pandas as pd
+from sklearn.metrics import accuracy_score
+
+import sys, os, getopt
+
+output_model_folder = './saved_models/'
+
+def get_best_model(X_train, y_train):
+    parameters = {'kernel':['rbf'], 'C': np.arange(1, 20)}
+    svc = svm.SVC(gamma="scale")
+    clf = GridSearchCV(svc, parameters, cv=5, scoring='accuracy', verbose=10)
+
+    clf.fit(X_train, y_train)
+
+    model = clf.best_estimator_
+
+    return model
+
+
+def main():
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python smv_model_train.py --data xxxx --output xxxx')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hd:o", ["help=", "data=", "output="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python smv_model_train.py --data xxxx --output xxxx')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python smv_model_train.py --data xxxx --output xxxx')
+            sys.exit()
+        elif o in ("-d", "--data"):
+            p_data_file = a
+        elif o in ("-o", "--output"):
+            p_output = a
+        else:
+            assert False, "unhandled option"
+
+    if not os.path.exists(output_model_folder):
+        os.makedirs(output_model_folder)
+
+    # get and split data
+    dataset = pd.read_csv(p_data_file, header=None, sep=";")
+
+    y_dataset = dataset.ix[:,0]
+    x_dataset = dataset.ix[:,1:]
+
+    X_train, X_test, y_train, y_test = train_test_split(x_dataset, y_dataset, test_size=0.4, random_state=42)
+
+    svm_model = get_best_model(X_train, y_train)
+
+    lr_model = LogisticRegression(solver='lbfgs', multi_class='multinomial', random_state=1)
+    rf_model = RandomForestClassifier(n_estimators=50, random_state=1)
+
+    ensemble_model = VotingClassifier(estimators=[
+       ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)],
+       voting='soft', weights=[2,1,1],
+       flatten_transform=True)
+
+    ensemble_model.fit(X_train, y_train)
+
+    y_pred = ensemble_model.predict(X_test)
+
+    print("Accuracy found %s " % str(accuracy_score(y_test, y_pred)))
+
+    joblib.dump(ensemble_model, output_model_folder + p_output + '.joblib') 
+
+if __name__== "__main__":
+    main()

+ 43 - 0
generateAndTrainSVM.sh

@@ -0,0 +1,43 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+VECTOR_SIZE=$1
+# selection of six scenes
+scenes="A, B, C, D, E, G"
+
+for size in {"4","8","16","26","32","40"}; do
+
+  start=0
+  for counter in {0..4}; do
+    end=$(($start+$size))
+
+    if [ "$end" -gt "$VECTOR_SIZE" ]; then
+        start=$(($VECTOR_SIZE-$size))
+        end=$(($VECTOR_SIZE))
+    fi
+
+    for zones in {"1, 3, 7, 9","0, 2, 7, 8, 9","2, 6, 8, 10, 13, 15","1, 2, 4, 7, 9, 10, 13, 15"}; do
+
+        zones_str="${zones//, /-}"
+
+        for mode in {"svd","svdn","svdne"}; do
+
+            FILENAME="data_svm/data_${mode}_N${size}_B${start}_E${end}_zones${zones_str}"
+
+            echo $FILENAME
+            python generate_data_svm.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --scenes "${scenes}" --zones "${zones}" --percent 1 --sep : --rowindex 1
+            ./apprentissage.sh -log2c -20,20,1 -log2g -20,20,1 ${FILENAME}.train &
+
+        done
+    done
+
+    start=$(($start+50))
+  done
+
+done

+ 41 - 0
generateAndTrainSVM_random.sh

@@ -0,0 +1,41 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+VECTOR_SIZE=$1
+# selection of six scenes
+scenes="A, B, C, D, E, G"
+
+for size in {"4","8","16","26","32","40"}; do
+
+  start=0
+  for counter in {0..4}; do
+    end=$(($start+$size))
+
+    if [ "$end" -gt "$VECTOR_SIZE" ]; then
+        start=$(($VECTOR_SIZE-$size))
+        end=$(($VECTOR_SIZE))
+    fi
+
+    for nb_zones in {3,4,5,6,7,8,9,10}; do
+
+        for mode in {"svd","svdn","svdne"}; do
+
+            FILENAME="data_svm/data_${mode}_N${size}_B${start}_E${end}_nb_zones_${$nb_zones}_random"
+
+            echo $FILENAME
+            python generate_data_svm.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --scenes "${scenes}" --zones "${zones}" --percent 1 --sep : --rowindex 1
+            ./apprentissage.sh -log2c -20,20,1 -log2g -20,20,1 ${FILENAME}.train &
+
+        done
+    done
+
+    start=$(($start+50))
+  done
+
+done

+ 83 - 26
generate_data.py

@@ -7,22 +7,24 @@ Created on Fri Sep 14 21:02:42 2018
 """
 
 from __future__ import print_function
-import sys, os
+import sys, os, getopt
 import numpy as np
 import random
 import time
+import json
 
 config_filename   = "config"
 zone_folder       = "zone"
 min_max_filename  = "min_max_values"
 output_file_svd   = "SVD_LAB_test_im6.csv"
 output_file_svdn  = "SVDN_LAB_test_im6.csv"
-output_file_svdne = "SVDNE_LAB_test_im6.csv" 
+output_file_svdne = "SVDNE_LAB_test_im6.csv"
 
 # define all scenes values
-scenes = ['Appart1opt02', 'Bureau1', 'Cendrier', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
+scenes = ['Appart1opt02', 'Bureau1', 'Cendrier', 'Cuisine01', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
+scenes_indexes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
 choices = ['svd', 'svdn', 'svdne']
-path = './data'
+path = './fichiersSVD'
 zones = np.arange(16)
 file_choice = [output_file_svd, output_file_svdn, output_file_svdne]
 seuil_expe_filename = 'seuilExpe'
@@ -34,7 +36,7 @@ def generate_data_svd_lab():
     @return nothing
     """
 
-    # TODO : 
+    # TODO :
     # - parcourir chaque dossier de scene
     scenes = os.listdir(path)
 
@@ -49,7 +51,7 @@ def generate_data_svd_lab():
             end_index_image = config_file.readline().strip()
             step_counter = int(config_file.readline().strip())
 
-        
+
         current_counter_index = int(start_index_image)
         end_counter_index = int(start_index_image)
 
@@ -61,32 +63,44 @@ def generate_data_svd_lab():
     # - récupérer les informations des fichiers de configurations
     # - création des fichiers de sortie SVD, SVDE, SVDNE
 
-def construct_new_line(path_seuil, interval, line, sep):
+def construct_new_line(path_seuil, interval, line, sep, index):
     begin, end = interval
 
     line_data = line.split(';')
     seuil = line_data[0]
-    metrics = line_data[begin+1:end]
+    metrics = line_data[begin+1:end+1]
 
     with open(path_seuil, "r") as seuil_file:
         seuil_learned = int(seuil_file.readline().strip())
-       
+
     if seuil_learned > int(seuil):
         line = '0'
     else:
         line = '1'
 
     for idx, val in enumerate(metrics):
-        line += " " + str(idx + 1) + ":" + val
+        if index:
+            line += " " + str(idx + 1)
+        line += sep
+        line += val
     line += '\n'
-    
+
     return line
 
-def generate_data_svm(_filename, _interval, _choice, _scenes = scenes, _zones = zones, _percent = 1, _sep=':'):
+def generate_data_svm(_filename, _interval, _choice, _scenes = scenes, _zones = zones, _percent = 1, _sep=':', _index=True):
 
     output_train_filename = _filename + ".train"
     output_test_filename = _filename + ".test"
 
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    output_folder = output_train_filename.split('/')[0]
+
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+
     train_file = open(output_train_filename, 'w')
     test_file = open(output_test_filename, 'w')
 
@@ -94,8 +108,7 @@ def generate_data_svm(_filename, _interval, _choice, _scenes = scenes, _zones =
 
     for id_scene, folder_scene in enumerate(scenes):
         scene_path = path + "/" + folder_scene
-        
-        print("Current path scene : " + scene_path)
+
         zones_folder = []
         # create zones list
         for index in zones:
@@ -105,19 +118,14 @@ def generate_data_svm(_filename, _interval, _choice, _scenes = scenes, _zones =
             zones_folder.append("zone"+index_str)
 
         for id_zone, zone_folder in enumerate(zones_folder):
-            print(zone_folder)
             zone_path = scene_path + "/" + zone_folder
             data_filename = file_choice[choices.index(_choice)]
             data_file_path = zone_path + "/" + data_filename
-            print(data_file_path)
-
-            print(id_zone in _zones)
 
              # getting number of line and read randomly lines
-            f = open(data_file_path)       
+            f = open(data_file_path)
             lines = f.readlines()
-            #num_lines = sum(1 for line in open(data_file_path))
-            
+
             num_lines = len(lines)
 
             lines_indexes = np.arange(num_lines)
@@ -125,26 +133,75 @@ def generate_data_svm(_filename, _interval, _choice, _scenes = scenes, _zones =
 
             path_seuil = zone_path + "/" + seuil_expe_filename
 
+            counter = 0
             # check if user select current scene and zone to be part of training data set
             for index in lines_indexes:
-                line = construct_new_line(path_seuil, _interval, lines[index], _sep)
+                line = construct_new_line(path_seuil, _interval, lines[index], _sep, _index)
 
-                if id_zone in _zones and folder_scene in _scenes:
+                percent = counter / num_lines
+                
+                if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
                     train_file.write(line)
                 else:
                     test_file.write(line)
 
+                counter += 1
+
             f.close()
 
     train_file.close()
     test_file.close()
-                
-
 
 def main():
 
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python generate_data_svm.py --output xxxx --interval 0,20  --kind svdne --scenes "A, B, D" --zones "1, 2, 3" --percent 0.7 --sep ":" --rowindex "1"')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r", ["help=", "output=", "interval=", "kind=", "scenes=", "zones=", "percent=", "sep=", "rowindex="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python generate_data_svm.py --output xxxx --interval 0,20  --kind svdne --scenes "A, B, D" --zones "1, 2, 3" --percent 0.7 --sep ":" --rowindex "1"')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python generate_data_svm.py --output xxxx --interval 0,20  --kind svdne --scenes "A, B, D" --zones "1, 2, 3" --percent 0.7 --sep ":" --rowindex "1"')
+            sys.exit()
+        elif o in ("-o", "--output"):
+            p_filename = a
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+        elif o in ("-k", "--kind"):
+            p_kind = a
+        elif o in ("-s", "--scenes"):
+            p_scenes = a.split(',')
+        elif o in ("-z", "--zones"):
+            p_zones = list(map(int, a.split(',')))
+        elif o in ("-p", "--percent"):
+            p_percent = float(a)
+        elif o in ("-s", "--sep"):
+            p_sep = a
+        elif o in ("-r", "--rowindex"):
+            if int(a) == 1:
+                p_rowindex = True
+            else:
+                p_rowindex = False
+        else:
+            assert False, "unhandled option"
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indexes.index(scene_id.strip())
+        scenes_selected.append(scenes[index])
+
+    for scene in scenes_selected:
+        print(scene)
+
     # create database using img folder (generate first time only)
-    generate_data_svm('test', [20, 100], 'svdne', _scenes=['Appart1opt02', 'Bureau1', 'Cendrier', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre'], _zones=[2, 3, 7, 8, 9, 10, 15, 0])
+    generate_data_svm(p_filename, p_interval, p_kind, scenes_selected, p_zones, p_percent, p_sep, p_rowindex)
 
 if __name__== "__main__":
     main()

+ 212 - 0
generate_data_svm_random.py

@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+import numpy as np
+import random
+import time
+import json
+
+config_filename   = "config"
+zone_folder       = "zone"
+min_max_filename  = "min_max_values"
+output_file_svd   = "SVD_LAB_test_im6.csv"
+output_file_svdn  = "SVDN_LAB_test_im6.csv"
+output_file_svdne = "SVDNE_LAB_test_im6.csv"
+
+# define all scenes values
+scenes = ['Appart1opt02', 'Bureau1', 'Cendrier', 'Cuisine01', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
+scenes_indexes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
+choices = ['svd', 'svdn', 'svdne']
+path = './fichiersSVD'
+zones = np.arange(16)
+file_choice = [output_file_svd, output_file_svdn, output_file_svdne]
+seuil_expe_filename = 'seuilExpe'
+
+def generate_data_svd_lab():
+    """
+    @brief Method which generates all .csv files from scenes photos
+    @param path - path of scenes folder information
+    @return nothing
+    """
+
+    # TODO :
+    # - parcourir chaque dossier de scene
+    scenes = os.listdir(path)
+
+    for folder_scene in scenes:
+
+        folder_path = path + "/" + folder_scene
+
+        with open(folder_path + "/" + config_filename, "r") as config_file:
+            last_image_name = config_file.readline().strip()
+            prefix_image_name = config_file.readline().strip()
+            start_index_image = config_file.readline().strip()
+            end_index_image = config_file.readline().strip()
+            step_counter = int(config_file.readline().strip())
+
+
+        current_counter_index = int(start_index_image)
+        end_counter_index = int(start_index_image)
+
+        print(current_counter_index)
+        while(current_counter_index <= end_index_image):
+            print(current_counter_index)
+            current_counter_index += step_counter
+
+    # - récupérer les informations des fichiers de configurations
+    # - création des fichiers de sortie SVD, SVDE, SVDNE
+
+def construct_new_line(path_seuil, interval, line, sep, index):
+    begin, end = interval
+
+    line_data = line.split(';')
+    seuil = line_data[0]
+    metrics = line_data[begin+1:end+1]
+
+    with open(path_seuil, "r") as seuil_file:
+        seuil_learned = int(seuil_file.readline().strip())
+
+    if seuil_learned > int(seuil):
+        line = '0'
+    else:
+        line = '1'
+
+    for idx, val in enumerate(metrics):
+        if index:
+            line += " " + str(idx + 1)
+        line += sep
+        line += val
+    line += '\n'
+
+    return line
+
+def generate_data_svm(_filename, _interval, _choice, _scenes = scenes, _nb_zones = 4, _percent = 1, _sep=':', _index=True):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    output_folder = output_train_filename.split('/')[0]
+
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    scenes = os.listdir(path)
+
+    for id_scene, folder_scene in enumerate(scenes):
+        scene_path = path + "/" + folder_scene
+
+        zones_folder = []
+        # create zones list
+        for index in zones:
+            index_str = str(index)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            zones_folder.append("zone"+index_str)
+
+        # shuffle list of zones (=> randomly choose zones)
+        random.shuffle(zones_folder)
+
+        for id_zone, zone_folder in enumerate(zones_folder):
+            zone_path = scene_path + "/" + zone_folder
+            data_filename = file_choice[choices.index(_choice)]
+            data_file_path = zone_path + "/" + data_filename
+
+             # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
+
+            num_lines = len(lines)
+
+            lines_indexes = np.arange(num_lines)
+            random.shuffle(lines_indexes)
+
+            path_seuil = zone_path + "/" + seuil_expe_filename
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for index in lines_indexes:
+                line = construct_new_line(path_seuil, _interval, lines[index], _sep, _index)
+
+                percent = counter / num_lines
+                
+                if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                    train_file.write(line)
+                else:
+                    test_file.write(line)
+
+                counter += 1
+
+            f.close()
+
+    train_file.close()
+    test_file.close()
+
+
+
+def main():
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python generate_data.py --output xxxx --interval 0,20  --kind svdne --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r", ["help=", "output=", "interval=", "kind=", "scenes=", "nb_zones=", "percent=", "sep=", "rowindex="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python generate_data.py --output xxxx --interval 0,20  --kind svdne --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python generate_data.py --output xxxx --interval 0,20  --kind svdne --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1')
+            sys.exit()
+        elif o in ("-o", "--output"):
+            p_filename = a
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+        elif o in ("-k", "--kind"):
+            p_kind = a
+        elif o in ("-s", "--scenes"):
+            p_scenes = a.split(',')
+        elif o in ("-n", "--nb_zones"):
+            p_nb_zones = int(a)
+        elif o in ("-p", "--percent"):
+            p_percent = float(a)
+        elif o in ("-s", "--sep"):
+            p_sep = a
+        elif o in ("-r", "--rowindex"):
+            if int(a) == 1:
+                p_rowindex = True
+            else:
+                p_rowindex = False
+        else:
+            assert False, "unhandled option"
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indexes.index(scene_id.strip())
+        scenes_selected.append(scenes[index])
+
+    for scene in scenes_selected:
+        print(scene)
+
+    # create database using img folder (generate first time only)
+    generate_data_svm(p_filename, p_interval, p_kind, scenes_selected, p_nb_zones, p_percent, p_sep, p_rowindex)
+
+if __name__== "__main__":
+    main()

+ 43 - 0
predictSVM.sh

@@ -0,0 +1,43 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+VECTOR_SIZE=$1
+# selection of six scenes
+scenes="Appart1opt02, Bureau1, Cendrier, PNDVuePlongeante, SdbDroite, Selles"
+
+for size in {"4","8","16","26","32","40"}; do
+
+  start=0
+  for counter in {0..4}; do
+    end=$(($start+$size))
+
+    if [ "$end" -gt "$VECTOR_SIZE" ]; then
+        start=$(($VECTOR_SIZE-$size))
+        end=$(($VECTOR_SIZE))
+    fi
+
+    for zones in {"1, 3, 7, 9","0, 2, 7, 8, 9","2, 6, 8, 10, 13, 15","1, 2, 4, 7, 9, 10, 13, 15"}; do
+
+        zones_str="${zones//, /-}"
+
+        for mode in {"svd","svdn","svdne"}; do
+
+
+            MODEL_FILENAME="data_svm/data_${mode}_N${size}_B${start}_E${end}_zones${zones_str}.train.model"
+            TEST_FILENAME="data_svm/data_${mode}_N${size}_B${start}_E${end}_zones${zones_str}.test"
+
+             ./prediction.sh ${TEST_FILENAME} ${MODEL_FILENAME} &
+
+        done
+    done
+
+    start=$(($start+50))
+  done
+
+done

+ 40 - 0
predictSVM_random.sh

@@ -0,0 +1,40 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+VECTOR_SIZE=$1
+# selection of six scenes
+scenes="Appart1opt02, Bureau1, Cendrier, PNDVuePlongeante, SdbDroite, Selles"
+
+for size in {"4","8","16","26","32","40"}; do
+
+  start=0
+  for counter in {0..4}; do
+    end=$(($start+$size))
+
+    if [ "$end" -gt "$VECTOR_SIZE" ]; then
+        start=$(($VECTOR_SIZE-$size))
+        end=$(($VECTOR_SIZE))
+    fi
+
+    for nb_zones in {3,4,5,6,7,8,9,10}; do
+
+        for mode in {"svd","svdn","svdne"}; do
+
+            MODEL_FILENAME="data_svm/data_${mode}_N${size}_B${start}_E${end}_nb_zones_${$nb_zones}.train.model"
+            TEST_FILENAME="data_svm/data_${mode}_N${size}_B${start}_E${end}_nb_zones_${$nb_zones}.test"
+
+            ./prediction.sh ${TEST_FILENAME} ${MODEL_FILENAME} &
+
+        done
+    done
+
+    start=$(($start+50))
+  done
+
+done

+ 52 - 0
prediction.py

@@ -0,0 +1,52 @@
+from sklearn.externals import joblib
+
+import numpy as np
+
+import pandas as pd
+from sklearn.metrics import accuracy_score
+
+import sys, os, getopt
+
+output_model_folder = './saved_models/'
+
+def main():
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python smv_model_train.py --data xxxx.csv --model xxxx.joblib --output xxxx')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hd:o", ["help=", "data=", "model=", "output="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python smv_model_train.py --data xxxx.csv --model xxxx.joblib --output xxxx')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python smv_model_train.py --data xxxx.csv --model xxxx.joblib --output xxxx')
+            sys.exit()
+        elif o in ("-d", "--data"):
+            p_data_file = a
+        elif o in ("-m", "--model"):
+            p_model_file = a
+        elif o in ("-o", "--output"):
+            p_output = a
+        else:
+            assert False, "unhandled option"
+
+    if not os.path.exists(output_model_folder):
+        os.makedirs(output_model_folder)
+
+    dataset = pd.read_csv(p_data_file, header=None, sep=";")
+
+    y_dataset = dataset.ix[:,0]
+    x_dataset = dataset.ix[:,1:]
+
+    model = joblib.load(p_model_file) 
+
+    y_pred = model.predict(x_dataset)
+
+    print("Accuracy found %s " % str(accuracy_score(y_dataset, y_pred)))
+
+if __name__== "__main__":
+    main()

+ 2 - 1
requirements.txt

@@ -7,4 +7,5 @@ image_slicer
 Pillow
 pydot
 matplotlib
-path.py
+path.py
+pandas

BIN
saved_models/svm_model.joblib


+ 71 - 0
svm_model_train.py

@@ -0,0 +1,71 @@
+from sklearn.model_selection import train_test_split
+from sklearn.model_selection import GridSearchCV
+
+import sklearn.svm as svm
+from sklearn.externals import joblib
+
+import numpy as np
+
+
+import pandas as pd
+from sklearn.metrics import accuracy_score
+
+import sys, os, getopt
+
+output_model_folder = './saved_models/'
+
+def get_best_model(X_train, y_train):
+    parameters = {'kernel':['rbf'], 'C': np.arange(1, 20)}
+    svc = svm.SVC(gamma="scale")
+    clf = GridSearchCV(svc, parameters, cv=5, scoring='accuracy', verbose=10)
+
+    clf.fit(X_train, y_train)
+
+    model = clf.best_estimator_
+
+    return model
+
+
+def main():
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python smv_model_train.py --data xxxx --output xxxx')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hd:o", ["help=", "data=", "output="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python smv_model_train.py --data xxxx --output xxxx')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python smv_model_train.py --data xxxx --output xxxx')
+            sys.exit()
+        elif o in ("-d", "--data"):
+            p_data_file = a
+        elif o in ("-o", "--output"):
+            p_output = a
+        else:
+            assert False, "unhandled option"
+
+    if not os.path.exists(output_model_folder):
+        os.makedirs(output_model_folder)
+
+    dataset = pd.read_csv(p_data_file, header=None, sep=";")
+
+    y_dataset = dataset.ix[:,0]
+    x_dataset = dataset.ix[:,1:]
+
+    X_train, X_test, y_train, y_test = train_test_split(x_dataset, y_dataset, test_size=0.4, random_state=42)
+
+    svm_model = get_best_model(X_train, y_train)
+
+    y_pred = svm_model.predict(X_test)
+
+    print("Accuracy found %s " % str(accuracy_score(y_test, y_pred)))
+
+    joblib.dump(svm_model, output_model_folder + p_output + '.joblib') 
+
+if __name__== "__main__":
+    main()

File diff suppressed because it is too large
+ 0 - 15616
test.test


File diff suppressed because it is too large
+ 0 - 11264
test.train


+ 67 - 0
testModelByScene.sh

@@ -0,0 +1,67 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No first argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No second argument supplied"
+    echo "Need of model input"
+    exit 1
+fi
+
+if [ -z "$3" ]
+  then
+    echo "No third argument supplied"
+    echo "Need of separator char : ':', ';'"
+    exit 1
+fi
+
+if [ -z "$4" ]
+  then
+    echo "No fourth argument supplied"
+    echo "Need of index row indication : 0 or 1"
+    exit 1
+fi
+
+VECTOR_SIZE=$1
+INPUT_MODEL=$2
+INPUT_SEP=$3
+INPUT_ROW=$4
+
+
+for size in {"4","8","16","26","32","40"}; do
+
+  start=0
+  for counter in {0..4}; do
+    end=$(($start+$size))
+
+    if [ "$end" -gt "$VECTOR_SIZE" ]; then
+        start=$(($VECTOR_SIZE-$size))
+        end=$(($VECTOR_SIZE))
+    fi
+
+  
+    zones = "0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15"
+    zones_str="${zones//, /-}"
+
+    for scene in {"A","B","C","D","E","F","G","H","I"}; do
+
+        for mode in {"svd","svdn","svdne"}; do
+            FILENAME="data_svm/data_${mode}_N${size}_B${start}_E${end}_scene${scene}"
+
+            echo $FILENAME
+            python generate_data_svm.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --scenes "${scene}" --zones "${zones}" --percent 1 --sep "${INPUT_SEP}" --rowindex "${INPUT_ROW}"
+            python prediction.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}.prediction"
+
+        done
+    done
+
+    start=$(($start+50))
+  done
+
+done