@@ -4,11 +4,11 @@ from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
import sklearn.svm as svm
+from sklearn.utils import shuffle
from sklearn.externals import joblib
import numpy as np
-
import pandas as pd
from sklearn.metrics import accuracy_score
@@ -57,8 +57,23 @@ def main():
# get and split data
dataset = pd.read_csv(p_data_file, header=None, sep=";")
- y_dataset = dataset.ix[:,0]
- x_dataset = dataset.ix[:,1:]
+ # default first shuffle of data
+ dataset = shuffle(dataset)
+
+ # get dataset with equal number of classes occurences
+ noisy_df = dataset[dataset.ix[:, 0] == 1]
+ not_noisy_df = dataset[dataset.ix[:, 0] == 0]
+ nb_not_noisy = len(not_noisy_df.index)
+ final_df = pd.concat([not_noisy_df, noisy_df[0:nb_not_noisy]])
+ # shuffle data another time
+ final_df = shuffle(final_df)
+ print(len(final_df.index))
+ y_dataset = final_df.ix[:,0]
+ x_dataset = final_df.ix[:,1:]
X_train, X_test, y_train, y_test = train_test_split(x_dataset, y_dataset, test_size=0.3333, random_state=42)
@@ -0,0 +1,60 @@
+#! bin/bash
+if [ -z "$1" ]
+ then
+ echo "No argument supplied"
+ echo "Need of vector size"
+ exit 1
+fi
+if [ -z "$2" ]
+ echo "Need of model output name"
+VECTOR_SIZE=$1
+INPUT_MODEL_NAME=$2
+# selection of six scenes
+scenes="A, B, C, D, E, F, G, H, I"
+for size in {"4","8","16","26","32","40"}; do
+ start=0
+ for counter in {0..4}; do
+ end=$(($start+$size))
+ if [ "$end" -gt "$VECTOR_SIZE" ]; then
+ start=$(($VECTOR_SIZE-$size))
+ end=$(($VECTOR_SIZE))
+ fi
+ for nb_zones in {2,3,4,5,6,7,8,9,10}; do
+ for mode in {"svd","svdn","svdne"}; do
+ FILENAME="data_svm/data_${mode}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_random"
+ MODEL_NAME="saved_models/${INPUT_MODEL_NAME}_${mode}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}"
+ echo $FILENAME
+ python generate_data_svm_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
+ python ensemble_model_train.py --data ${FILENAME}.train --output ${MODEL_NAME}
+ bash testModelByScene.sh "${begin}" "${end}" "${MODEL_NAME}.joblib" "${mode}" >> ${FILENAME}.tex
+ done
+ start=$(($start+50))
+done
@@ -42,6 +42,8 @@ for size in {"4","8","16","26","32","40"}; do
python generate_data_svm_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
python svm_model_train.py --data ${FILENAME}.train --output ${MODEL_NAME} &
+ # add computation of scenes score and LaTeX display of its
done
if [ -z "$2" ]
@@ -201,7 +201,7 @@ def main():
scenes_selected.append(scenes[index])
for scene in scenes_selected:
- print(scene)
+ print(scene + " : ")
# create database using img folder (generate first time only)
generate_data_svm(p_filename, p_interval, p_kind, scenes_selected, p_zones, p_percent, p_sep, p_rowindex)
@@ -8,8 +8,6 @@ if [ -z "$1" ]
fi
VECTOR_SIZE=$1
-# selection of six scenes
-scenes="Appart1opt02, Bureau1, Cendrier, PNDVuePlongeante, SdbDroite, Selles"
for size in {"4","8","16","26","32","40"}; do
@@ -48,7 +48,7 @@ def main():
accuracy = accuracy_score(y_dataset, y_pred)
- print("Accuracy found %s " % str(accuracy))
+ print(str(accuracy))
with open(p_output, 'w') as f:
f.write("Accuracy found %s " % str(accuracy))
@@ -1,12 +1,13 @@
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
@@ -54,8 +55,21 @@ def main():
X_train, X_test, y_train, y_test = train_test_split(x_dataset, y_dataset, test_size=0.4, random_state=42)
@@ -31,18 +31,16 @@ fi
INPUT_BEGIN=$1
INPUT_END=$2
INPUT_MODEL=$3
-INPUT_MODE$4
+INPUT_MODE=$4
zones="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15"
for scene in {"A","B","C","D","E","F","G","H","I"}; do
- for mode in {"svd","svdn","svdne"}; do
+ FILENAME="data_svm/data_${mode}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
- FILENAME="data_svm/data_${mode}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
+ python generate_data_svm.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --scenes "${scene}" --zones "${zones}" --percent 1 --sep ";" --rowindex "0"
- python generate_data_svm.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${mode} --scenes "${scene}" --zones "${zones}" --percent 1 --sep ";" --rowindex "0"
+ python prediction.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}.prediction"
- python prediction.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${mode}.prediction"
- done