Parcourir la source

Scripts updated

Jerome Buisine il y a 5 ans
Parent
commit
57e264ffb9

+ 18 - 3
ensemble_model_train.py

@@ -4,11 +4,11 @@ from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier, VotingClassifier
 
 import sklearn.svm as svm
+from sklearn.utils import shuffle
 from sklearn.externals import joblib
 
 import numpy as np
 
-
 import pandas as pd
 from sklearn.metrics import accuracy_score
 
@@ -57,8 +57,23 @@ def main():
     # get and split data
     dataset = pd.read_csv(p_data_file, header=None, sep=";")
 
-    y_dataset = dataset.ix[:,0]
-    x_dataset = dataset.ix[:,1:]
+     # default first shuffle of data
+    dataset = shuffle(dataset)
+    
+    # get dataset with equal number of classes occurences
+    noisy_df = dataset[dataset.ix[:, 0] == 1]
+    not_noisy_df = dataset[dataset.ix[:, 0] == 0]
+    nb_not_noisy = len(not_noisy_df.index)
+
+    final_df = pd.concat([not_noisy_df, noisy_df[0:nb_not_noisy]])
+  
+    # shuffle data another time
+    final_df = shuffle(final_df)
+    
+    print(len(final_df.index))
+
+    y_dataset = final_df.ix[:,0]
+    x_dataset = final_df.ix[:,1:]
 
     X_train, X_test, y_train, y_test = train_test_split(x_dataset, y_dataset, test_size=0.3333, random_state=42)
 

+ 60 - 0
generateAndTrainEnsemble_random.sh

@@ -0,0 +1,60 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No argument supplied"
+    echo "Need of model output name"
+    exit 1
+fi
+
+VECTOR_SIZE=$1
+INPUT_MODEL_NAME=$2
+
+# selection of six scenes
+scenes="A, B, C, D, E, F, G, H, I"
+
+for size in {"4","8","16","26","32","40"}; do
+
+  start=0
+  for counter in {0..4}; do
+    end=$(($start+$size))
+
+    if [ "$end" -gt "$VECTOR_SIZE" ]; then
+        start=$(($VECTOR_SIZE-$size))
+        end=$(($VECTOR_SIZE))
+    fi
+
+    for nb_zones in {2,3,4,5,6,7,8,9,10}; do
+
+        for mode in {"svd","svdn","svdne"}; do
+
+            FILENAME="data_svm/data_${mode}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_random"
+            MODEL_NAME="saved_models/${INPUT_MODEL_NAME}_${mode}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}"
+
+            echo $FILENAME
+            python generate_data_svm_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
+            python ensemble_model_train.py --data ${FILENAME}.train --output ${MODEL_NAME}
+            bash testModelByScene.sh "${begin}" "${end}" "${MODEL_NAME}.joblib" "${mode}" >> ${FILENAME}.tex
+
+        done
+    done
+if [ -z "$2" ]
+  then
+    echo "No argument supplied"
+    echo "Need of model output name"
+    exit 1
+fi
+
+VECTOR_SIZE=$1
+INPUT_MODEL_NAME=$2
+    start=$(($start+50))
+  done
+
+done

+ 2 - 0
generateAndTrainSVM_random.sh

@@ -42,6 +42,8 @@ for size in {"4","8","16","26","32","40"}; do
             python generate_data_svm_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
             python svm_model_train.py --data ${FILENAME}.train --output ${MODEL_NAME} &
 
+            # add computation of scenes score and LaTeX display of its
+
         done
     done
 if [ -z "$2" ]

+ 1 - 1
generate_data_svm.py

@@ -201,7 +201,7 @@ def main():
         scenes_selected.append(scenes[index])
 
     for scene in scenes_selected:
-        print(scene)
+        print(scene + " : ")
 
     # create database using img folder (generate first time only)
     generate_data_svm(p_filename, p_interval, p_kind, scenes_selected, p_zones, p_percent, p_sep, p_rowindex)

+ 0 - 2
predictSVM_random.sh

@@ -8,8 +8,6 @@ if [ -z "$1" ]
 fi
 
 VECTOR_SIZE=$1
-# selection of six scenes
-scenes="Appart1opt02, Bureau1, Cendrier, PNDVuePlongeante, SdbDroite, Selles"
 
 for size in {"4","8","16","26","32","40"}; do
 

+ 1 - 1
prediction.py

@@ -48,7 +48,7 @@ def main():
 
     accuracy = accuracy_score(y_dataset, y_pred)
 
-    print("Accuracy found %s " % str(accuracy))
+    print(str(accuracy))
 
     with open(p_output, 'w') as f:
         f.write("Accuracy found %s " % str(accuracy))

+ 17 - 3
svm_model_train.py

@@ -1,12 +1,13 @@
 from sklearn.model_selection import train_test_split
 from sklearn.model_selection import GridSearchCV
 
+from sklearn.utils import shuffle
+
 import sklearn.svm as svm
 from sklearn.externals import joblib
 
 import numpy as np
 
-
 import pandas as pd
 from sklearn.metrics import accuracy_score
 
@@ -54,8 +55,21 @@ def main():
 
     dataset = pd.read_csv(p_data_file, header=None, sep=";")
 
-    y_dataset = dataset.ix[:,0]
-    x_dataset = dataset.ix[:,1:]
+    # default first shuffle of data
+    dataset = shuffle(dataset)
+    
+    # get dataset with equal number of classes occurences
+    noisy_df = dataset[dataset.ix[:, 0] == 1]
+    not_noisy_df = dataset[dataset.ix[:, 0] == 0]
+    nb_not_noisy = len(not_noisy_df.index)
+
+    final_df = pd.concat([not_noisy_df, noisy_df[0:nb_not_noisy]])
+  
+    # shuffle data another time
+    final_df = shuffle(final_df)
+
+    y_dataset = final_df.ix[:,0]
+    x_dataset = final_df.ix[:,1:]
 
     X_train, X_test, y_train, y_test = train_test_split(x_dataset, y_dataset, test_size=0.4, random_state=42)
 

+ 4 - 6
testModelByScene.sh

@@ -31,18 +31,16 @@ fi
 INPUT_BEGIN=$1
 INPUT_END=$2
 INPUT_MODEL=$3
-INPUT_MODE$4
+INPUT_MODE=$4
 
 zones="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15"
 
 for scene in {"A","B","C","D","E","F","G","H","I"}; do
 
-  for mode in {"svd","svdn","svdne"}; do
+  FILENAME="data_svm/data_${mode}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
 
-      FILENAME="data_svm/data_${mode}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
+  python generate_data_svm.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --scenes "${scene}" --zones "${zones}" --percent 1 --sep ";" --rowindex "0"
 
-      python generate_data_svm.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${mode} --scenes "${scene}" --zones "${zones}" --percent 1 --sep ";" --rowindex "0"
+  python prediction.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}.prediction"
 
-      python prediction.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${mode}.prediction"
-  done
 done