4 anni fa · c409191b1c
--- a/custom_config.py
+++ b/custom_config.py
@@ -22,7 +22,7 @@ results_information_folder      = os.path.join(output_data_folder, 'results')
 
				 ## correlation_indices_folder      = 'corr_indices'
			
 
				 
			
 
				 # variables
			
 
				-features_choices_labels                 = features_choices_labels + ['filters_statistics']
			
 
				+features_choices_labels                 = features_choices_labels + ['filters_statistics', 'statistics_extended']
			
 
				 optimization_filters_result_filename    = 'optimization_comparisons_filters.csv'
			
 
				 optimization_attributes_result_filename = 'optimization_comparisons_attributes.csv'
			
 
				 
			
--- a/data_attributes.py
+++ b/data_attributes.py
@@ -99,15 +99,18 @@ def get_image_features(data_type, block):
 
				         bytes_data = np.array(block).tobytes()
			
 
				         compress_data = gzip.compress(bytes_data)
			
 
				 
			
 
				-        data.append(data, sys.getsizeof(compress_data))
			
 
				+        data = np.append(data, sys.getsizeof(compress_data))
			
 
				+
			
 
				+        lab_img = transform.get_LAB_L(block)
			
 
				+        arr = np.array(lab_img)
			
 
				 
			
 
				         # add sobel complexity (kernel size of 5)
			
 
				-        sobelx = cv2.Sobel(lab_img, cv2.CV_64F, 1, 0, ksize=5)
			
 
				-        sobely = cv2.Sobel(lab_img, cv2.CV_64F, 0, 1,ksize=5)
			
 
				+        sobelx = cv2.Sobel(arr, cv2.CV_64F, 1, 0, ksize=5)
			
 
				+        sobely = cv2.Sobel(arr, cv2.CV_64F, 0, 1,ksize=5)
			
 
				 
			
 
				         sobel_mag = np.array(np.hypot(sobelx, sobely), 'uint8')  # magnitude
			
 
				 
			
 
				-        data.append(data, np.std(sobel_mag))
			
 
				+        data = np.append(data, np.std(sobel_mag))
			
 
				 
			
 
				     if 'lab' in data_type:
			
 
				 
			
--- a/data_processing/generateAndTrain_maxwell_custom.sh
+++ b/data_processing/generateAndTrain_maxwell_custom.sh
@@ -1,58 +0,0 @@
 
				-#! bin/bash
			
 
				-
			
 
				-if [ -z "$1" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of vector size"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-if [ -z "$2" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of feature information"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-if [ -z "$3" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of kind of data to use"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-size=$1
			
 
				-feature=$2
			
 
				-data=$3
			
 
				-
			
 
				-# selection of four scenes (only maxwell)
			
 
				-scenes="A, D, G, H"
			
 
				-
			
 
				-start=0
			
 
				-end=$size
			
 
				-
			
 
				-for nb_zones in {4,6,8,10,11,12}; do
			
 
				-
			
 
				-    for mode in {"svd","svdn","svdne"}; do
			
 
				-        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
			
 
				-
			
 
				-            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
			
 
				-            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
			
 
				-            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_min_max"
			
 
				-
			
 
				-            echo $FILENAME
			
 
				-
			
 
				-            # only compute if necessary (perhaps server will fall.. Just in case)
			
 
				-            if grep -q "${MODEL_NAME}" "${result_filename}"; then
			
 
				-
			
 
				-                echo "${MODEL_NAME} results already generated..."
			
 
				-            else
			
 
				-                python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				-                #python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
			
 
				-
			
 
				-                #python prediction/predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				-                #python others/save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
			
 
				-            fi
			
 
				-        done
			
 
				-    done
			
 
				-done
			
--- a/data_processing/generateAndTrain_maxwell_custom_optimization.sh
+++ b/data_processing/generateAndTrain_maxwell_custom_optimization.sh
@@ -1,68 +0,0 @@
 
				-#! bin/bash
			
 
				-
			
 
				-if [ -z "$1" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of vector size"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-if [ -z "$2" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of feature information"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-if [ -z "$3" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of kind of data to use"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-if [ -z "$4" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Use of filters or attributes"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-
			
 
				-size=$1
			
 
				-feature=$2
			
 
				-data=$3
			
 
				-filter=$4
			
 
				-
			
 
				-
			
 
				-# selection of four scenes (only maxwell)
			
 
				-scenes="A, D, G, H"
			
 
				-result_filename="results/optimization_comparisons_${filter}.csv"
			
 
				-start=0
			
 
				-end=$size
			
 
				-
			
 
				-#for nb_zones in {4,6,8,10,12}; do
			
 
				-for nb_zones in {10,12}; do
			
 
				-
			
 
				-    for mode in {"svd","svdn","svdne"}; do
			
 
				-        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
			
 
				-      
			
 
				-            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_${filter}"
			
 
				-            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_${filter}"
			
 
				-            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_${filter}_min_max"
			
 
				-
			
 
				-            echo $FILENAME
			
 
				-
			
 
				-            # only compute if necessary (perhaps server will fall.. Just in case)
			
 
				-            if grep -q "${MODEL_NAME}" "${result_filename}"; then
			
 
				-
			
 
				-                echo "${MODEL_NAME} results already generated..."
			
 
				-            else
			
 
				-                python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				-                
			
 
				-                echo "Train ${MODEL_NAME}"
			
 
				-                #python find_best_${filter}.py --data ${FILENAME} --choice ${model} &
			
 
				-            fi
			
 
				-        done
			
 
				-    done
			
 
				-done
			
--- a/find_best_attributes.py
+++ b/find_best_attributes.py
@@ -13,9 +13,9 @@ from sklearn.model_selection import GridSearchCV
 
				 from sklearn.linear_model import LogisticRegression
			
 
				 from sklearn.ensemble import RandomForestClassifier, VotingClassifier
			
 
				 
			
 
				+import joblib
			
 
				 import sklearn.svm as svm
			
 
				 from sklearn.utils import shuffle
			
 
				-from sklearn.externals import joblib
			
 
				 from sklearn.metrics import roc_auc_score
			
 
				 from sklearn.model_selection import cross_val_score
			
 
				 
			
@@ -39,8 +39,8 @@ from optimization.checkpoints.BasicCheckpoint import BasicCheckpoint
 
				 # variables and parameters
			
 
				 models_list         = cfg.models_names_list
			
 
				 number_of_values    = 26
			
 
				-ils_iteration       = 1000
			
 
				-ls_iteration        = 20
			
 
				+ils_iteration       = 10
			
 
				+ls_iteration        = 5
			
 
				 
			
 
				 # default validator
			
 
				 def validator(solution):
			
@@ -52,7 +52,7 @@ def validator(solution):
 
				 
			
 
				 # init solution (26 attributes)
			
 
				 def init():
			
 
				-    return BinarySolution([], number_of_values).random(validator)
			
 
				+    return BinarySolution([], 26).random(validator)
			
 
				 
			
 
				 def loadDataset(filename):
			
 
				 
			
@@ -117,7 +117,7 @@ def main():
 
				     if not os.path.exists(cfg.output_logs_folder):
			
 
				         os.makedirs(cfg.output_logs_folder)
			
 
				 
			
 
				-    logging.basicConfig(format='%(asctime)s %(message)s', filename='logs/%s.log' % p_data_file.split('/')[-1], level=logging.DEBUG)
			
 
				+    logging.basicConfig(format='%(asctime)s %(message)s', filename='data/logs/%s.log' % p_data_file.split('/')[-1], level=logging.DEBUG)
			
 
				 
			
 
				     # define evaluate function here (need of data information)
			
 
				     def evaluate(solution):
			
--- a/generate/generate_all_data_file.py
+++ b/generate/generate_all_data_file.py
@@ -55,6 +55,7 @@ def generate_data_svd(data_type, mode, dataset, output):
 
				 
			
 
				         print(folder_scene)
			
 
				         scene_path = os.path.join(dataset, folder_scene)
			
 
				+        output_scene_path = os.path.join(output_data_folder, output, folder_scene)
			
 
				 
			
 
				         # getting output filename
			
 
				         output_svd_filename = data_type + "_" + mode + generic_output_file_svd
			
@@ -72,7 +73,11 @@ def generate_data_svd(data_type, mode, dataset, output):
 
				             current_zone = "zone"+index_str
			
 
				             zones_folder.append(current_zone)
			
 
				 
			
 
				-            zone_path = os.path.join(scene_path, current_zone)
			
 
				+            zone_path = os.path.join(output_scene_path, current_zone)
			
 
				+
			
 
				+            if not os.path.exists(zone_path):
			
 
				+                os.makedirs(zone_path)
			
 
				+
			
 
				             svd_file_path = os.path.join(zone_path, output_svd_filename)
			
 
				 
			
 
				             # add writer into list
			
--- a/models.py
+++ b/models.py
@@ -10,13 +10,14 @@ import sklearn.svm as svm
 
				 
			
 
				 def _get_best_model(X_train, y_train):
			
 
				 
			
 
				-    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
			
 
				-    #Cs = [1, 2, 4, 8, 16, 32]
			
 
				-    gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
			
 
				+    #Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
			
 
				+    Cs = [1, 2, 4, 8, 16, 32]
			
 
				+    # gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
			
 
				+    gammas = [0.001, 0.1, 1, 10, 100]
			
 
				     param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
			
 
				 
			
 
				     svc = svm.SVC(probability=True)
			
 
				-    clf = GridSearchCV(svc, param_grid, cv=10, scoring='accuracy', verbose=2)
			
 
				+    clf = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', verbose=2)
			
 
				 
			
 
				     clf.fit(X_train, y_train)
			
 
				 
			
--- a/prediction/estimate_thresholds.py
+++ b/prediction/estimate_thresholds.py
@@ -0,0 +1,169 @@
 
				+# main imports
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import sys, os, argparse
			
 
				+
			
 
				+# image processing
			
 
				+from PIL import Image
			
 
				+from ipfml import utils
			
 
				+from ipfml.processing import transform, segmentation
			
 
				+
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+# model imports
			
 
				+import joblib
			
 
				+
			
 
				+# modules and config imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+from modules.utils import data as dt
			
 
				+
			
 
				+from data_attributes import get_image_features
			
 
				+
			
 
				+zones_indices  = cfg.zones_indices
			
 
				+
			
 
				+def write_progress(progress):
			
 
				+    barWidth = 180
			
 
				+
			
 
				+    output_str = "["
			
 
				+    pos = barWidth * progress
			
 
				+    for i in range(barWidth):
			
 
				+        if i < pos:
			
 
				+           output_str = output_str + "="
			
 
				+        elif i == pos:
			
 
				+           output_str = output_str + ">"
			
 
				+        else:
			
 
				+            output_str = output_str + " "
			
 
				+
			
 
				+    output_str = output_str + "] " + str(int(progress * 100.0)) + " %\r"
			
 
				+    print(output_str)
			
 
				+    sys.stdout.write("\033[F")
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="Read and compute model on scene in order to make predictions")
			
 
				+
			
 
				+    parser.add_argument('--folder', type=str, help='folder where scene data are stored', required=True)
			
 
				+    parser.add_argument('--model', type=str, help='model file', required=True)
			
 
				+    parser.add_argument('--solution', type=str, help='Data of solution to specify filters to use', required=True)
			
 
				+    parser.add_argument('--method', type=str, help='method name to used', choices=cfg.features_choices_labels, default=cfg.features_choices_labels[0], required=True)
			
 
				+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices, required=True)
			
 
				+    parser.add_argument('--n_stop', type=int, help='n consecutive prediction to stop', default=1)
			
 
				+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default='')
			
 
				+    parser.add_argument('--save', type=str, help='filename where to save input data', required=True)
			
 
				+    parser.add_argument('--label', type=str, help='label to use when saving thresholds', required=True)
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    p_model    = args.model
			
 
				+    p_solution   = list(map(int, args.solution.split(' ')))
			
 
				+    p_method   = args.method
			
 
				+    p_n_stop   = args.n_stop
			
 
				+    p_folder   = args.folder
			
 
				+    p_mode     = args.kind
			
 
				+    p_custom   = args.custom
			
 
				+    p_save     = args.save
			
 
				+    p_label    = args.label
			
 
				+
			
 
				+    if len(p_custom) > 0:
			
 
				+        # need to read min_max_file
			
 
				+        with open(p_custom, 'r') as f:
			
 
				+            min_val = float(f.readline().replace('\n', ''))
			
 
				+            max_val = float(f.readline().replace('\n', ''))
			
 
				+
			
 
				+    # 1. get scene name
			
 
				+    scene_path = p_folder
			
 
				+
			
 
				+    # 2. load model and compile it
			
 
				+
			
 
				+    # TODO : check kind of model
			
 
				+    model = joblib.load(p_model)
			
 
				+    # model.compile(loss='binary_crossentropy',
			
 
				+    #               optimizer='rmsprop',
			
 
				+    #               metrics=['accuracy'])
			
 
				+
			
 
				+    # 3. get indices kept by solution
			
 
				+    # get indices of attributes data to use (attributes selection from solution)
			
 
				+    indices = []
			
 
				+
			
 
				+    for index, value in enumerate(p_solution): 
			
 
				+        if value == 1: 
			
 
				+            indices.append(index)
			
 
				+
			
 
				+    # 4. prepare scene to predict
			
 
				+    estimated_thresholds = []
			
 
				+    n_estimated_thresholds = []
			
 
				+    zones_list = np.arange(16)
			
 
				+
			
 
				+    # 4. get estimated thresholds using model and specific method
			
 
				+    images_path = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
			
 
				+    number_of_images = len(images_path)
			
 
				+    image_indices = [ dt.get_scene_image_quality(img_path) for img_path in images_path ]
			
 
				+
			
 
				+    image_counter = 0
			
 
				+
			
 
				+
			
 
				+    # append empty list
			
 
				+    for _ in zones_list:
			
 
				+        estimated_thresholds.append(None)
			
 
				+        n_estimated_thresholds.append(0)
			
 
				+
			
 
				+    for img_i, img_path in enumerate(images_path):
			
 
				+
			
 
				+        blocks = segmentation.divide_in_blocks(Image.open(img_path), (200, 200))
			
 
				+
			
 
				+        for index, block in enumerate(blocks):
			
 
				+            
			
 
				+            if estimated_thresholds[index] is None:
			
 
				+                
			
 
				+                # check if prediction is possible
			
 
				+                data = np.array(get_image_features(p_method, np.array(block)))
			
 
				+
			
 
				+                if p_mode == 'svdn':
			
 
				+                    data = utils.normalize_arr_with_range(data)
			
 
				+
			
 
				+                if p_mode == 'svdne':
			
 
				+                    data = utils.normalize_arr_with_range(data, min_val, max_val)
			
 
				+
			
 
				+                data = np.array(data)[indices]
			
 
				+
			
 
				+                #data = np.expand_dims(data, axis=0)
			
 
				+                #print(data.shape)
			
 
				+                
			
 
				+                prob = model.predict(np.array(data).reshape(1, -1))[0]
			
 
				+                #print(index, ':', image_indices[img_i], '=>', prob)
			
 
				+
			
 
				+                if prob < 0.5:
			
 
				+                    n_estimated_thresholds[index] += 1
			
 
				+
			
 
				+                    # if same number of detection is attempted
			
 
				+                    if n_estimated_thresholds[index] >= p_n_stop:
			
 
				+                        estimated_thresholds[index] = image_indices[img_i]
			
 
				+                else:
			
 
				+                    n_estimated_thresholds[index] = 0
			
 
				+
			
 
				+        # write progress bar
			
 
				+        write_progress((image_counter + 1) / number_of_images)
			
 
				+        
			
 
				+        image_counter = image_counter + 1
			
 
				+    
			
 
				+    # default label
			
 
				+    for i, _ in enumerate(zones_list):
			
 
				+        if estimated_thresholds[i] == None:
			
 
				+            estimated_thresholds[i] = image_indices[-1]
			
 
				+
			
 
				+    # 6. save estimated thresholds into specific file
			
 
				+    print(estimated_thresholds)
			
 
				+    print(p_save)
			
 
				+    if p_save is not None:
			
 
				+        with open(p_save, 'a') as f:
			
 
				+            f.write(p_label + ';')
			
 
				+
			
 
				+            for t in estimated_thresholds:
			
 
				+                f.write(str(t) + ';')
			
 
				+            f.write('\n')
			
 
				+    
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,4 +10,4 @@ matplotlib
 
				 path.py
			
 
				 pandas
			
 
				 opencv-python
			
 
				-gzip
			
 
				+joblib
			
--- a/train_model.py
+++ b/train_model.py
@@ -23,7 +23,7 @@ import custom_config as cfg
 
				 import models as mdl
			
 
				 
			
 
				 # variables and parameters
			
 
				-saved_models_folder = cfg.saved_models_folder
			
 
				+saved_models_folder = cfg.output_models
			
 
				 models_list         = cfg.models_names_list
			
 
				 
			
 
				 current_dirpath     = os.getcwd()
			
--- a/train_model_attributes.py
+++ b/train_model_attributes.py
@@ -9,9 +9,9 @@ from sklearn.model_selection import GridSearchCV
 
				 from sklearn.linear_model import LogisticRegression
			
 
				 from sklearn.ensemble import RandomForestClassifier, VotingClassifier
			
 
				 
			
 
				+import joblib
			
 
				 import sklearn.svm as svm
			
 
				 from sklearn.utils import shuffle
			
 
				-from sklearn.externals import joblib
			
 
				 from sklearn.metrics import accuracy_score, f1_score
			
 
				 from sklearn.model_selection import cross_val_score
			
 
				 
			
@@ -22,7 +22,7 @@ import custom_config as cfg
 
				 import models as mdl
			
 
				 
			
 
				 # variables and parameters
			
 
				-saved_models_folder = cfg.saved_models_folder
			
 
				+saved_models_folder = cfg.output_models
			
 
				 models_list         = cfg.models_names_list
			
 
				 
			
 
				 current_dirpath     = os.getcwd()
			
@@ -33,7 +33,7 @@ def main():
 
				 
			
 
				     parser = argparse.ArgumentParser(description="Train SKLearn model and save it into .joblib file")
			
 
				 
			
 
				-    parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .test)')
			
 
				+    parser.add_argument('--data', type=str, help='dataset filename prefiloc (without .train and .test)')
			
 
				     parser.add_argument('--output', type=str, help='output file name desired for model (without .joblib extension)')
			
 
				     parser.add_argument('--choice', type=str, help='model choice from list of choices', choices=models_list)
			
 
				     parser.add_argument('--solution', type=str, help='Data of solution to specify filters to use')
			
@@ -59,12 +59,12 @@ def main():
 
				     dataset_test = shuffle(dataset_test)
			
 
				 
			
 
				     # get dataset with equal number of classes occurences
			
 
				-    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
			
 
				-    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
			
 
				+    noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 1]
			
 
				+    not_noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 0]
			
 
				     nb_noisy_train = len(noisy_df_train.index)
			
 
				 
			
 
				-    noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
			
 
				-    not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
			
 
				+    noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 1]
			
 
				+    not_noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 0]
			
 
				     nb_noisy_test = len(noisy_df_test.index)
			
 
				 
			
 
				     final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
			
@@ -78,11 +78,11 @@ def main():
 
				     final_df_test_size = len(final_df_test.index)
			
 
				 
			
 
				     # use of the whole data set for training
			
 
				-    x_dataset_train = final_df_train.ix[:,1:]
			
 
				-    x_dataset_test = final_df_test.ix[:,1:]
			
 
				+    x_dataset_train = final_df_train.iloc[:,1:]
			
 
				+    x_dataset_test = final_df_test.iloc[:,1:]
			
 
				 
			
 
				-    y_dataset_train = final_df_train.ix[:,0]
			
 
				-    y_dataset_test = final_df_test.ix[:,0]
			
 
				+    y_dataset_train = final_df_train.iloc[:,0]
			
 
				+    y_dataset_test = final_df_test.iloc[:,0]
			
 
				 
			
 
				     # get indices of filters data to use (filters selection from solution)
			
 
				     indices = []
			
--- a/train_model_filters.py
+++ b/train_model_filters.py
@@ -9,9 +9,9 @@ from sklearn.model_selection import GridSearchCV
 
				 from sklearn.linear_model import LogisticRegression
			
 
				 from sklearn.ensemble import RandomForestClassifier, VotingClassifier
			
 
				 
			
 
				+import joblib
			
 
				 import sklearn.svm as svm
			
 
				 from sklearn.utils import shuffle
			
 
				-from sklearn.externals import joblib
			
 
				 from sklearn.metrics import accuracy_score, f1_score
			
 
				 from sklearn.model_selection import cross_val_score
			
 
				 
			
@@ -22,7 +22,7 @@ import custom_config as cfg
 
				 import models as mdl
			
 
				 
			
 
				 # variables and parameters
			
 
				-saved_models_folder = cfg.saved_models_folder
			
 
				+saved_models_folder = cfg.output_models
			
 
				 models_list         = cfg.models_names_list
			
 
				 
			
 
				 current_dirpath     = os.getcwd()