Jérôme BUISINE 4 лет назад
Родитель
Сommit
5016614735
7 измененных файлов с 53 добавлено и 20 удалено
  1. 1 1
      custom_config.py
  2. 1 1
      data_attributes.py
  3. 5 4
      find_best_attributes.py
  4. 4 4
      find_best_filters.py
  5. 38 6
      models.py
  6. 2 2
      train_model_attributes.py
  7. 2 2
      train_model_filters.py

+ 1 - 1
custom_config.py

@@ -27,7 +27,7 @@ optimization_filters_result_filename    = 'optimization_comparisons_filters.csv'
 optimization_attributes_result_filename = 'optimization_comparisons_attributes.csv'
 
 filter_reduction_choices                = ['attributes', 'filters']
-models_names_list                       = ["svm_model","ensemble_model","ensemble_model_v2","deep_keras"]
+models_names_list                       = ["svm_model","ensemble_model","ensemble_model_v2","deep_keras", "svm_gpu"]
 
 ## models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2","deep_keras"]
 ## normalization_choices           = ['svd', 'svdn', 'svdne']

+ 1 - 1
data_attributes.py

@@ -124,7 +124,7 @@ def w2d(arr, mode='haar', level=1):
     imArray = arr
 
     sigma = restoration.estimate_sigma(imArray, average_sigmas=True, multichannel=False)
-    imArray_H = restoration.denoise_wavelet(imArray, sigma=sigma, wavelet='db1', mode='soft', 
+    imArray_H = restoration.denoise_wavelet(imArray, sigma=sigma, wavelet='db1', mode='hard', 
         wavelet_levels=2, 
         multichannel=False, 
         convert2ycbcr=False, 

+ 5 - 4
find_best_attributes.py

@@ -69,14 +69,15 @@ def loadDataset(filename):
     # get dataset with equal number of classes occurences
     noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 1]
     not_noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 0]
-    nb_noisy_train = len(noisy_df_train.index)
+    #nb_noisy_train = len(noisy_df_train.index)
 
     noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 1]
     not_noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 0]
-    nb_noisy_test = len(noisy_df_test.index)
+    #nb_noisy_test = len(noisy_df_test.index)
 
-    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
-    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
+    # use of all data
+    final_df_train = pd.concat([not_noisy_df_train, noisy_df_train])
+    final_df_test = pd.concat([not_noisy_df_test, noisy_df_test])
 
     # shuffle data another time
     final_df_train = shuffle(final_df_train)

+ 4 - 4
find_best_filters.py

@@ -68,14 +68,14 @@ def loadDataset(filename):
     # get dataset with equal number of classes occurences
     noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 1]
     not_noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 0]
-    nb_noisy_train = len(noisy_df_train.index)
+    #nb_noisy_train = len(noisy_df_train.index)
 
     noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 1]
     not_noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 0]
-    nb_noisy_test = len(noisy_df_test.index)
+    #nb_noisy_test = len(noisy_df_test.index)
 
-    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
-    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
+    final_df_train = pd.concat([not_noisy_df_train, noisy_df_train])
+    final_df_test = pd.concat([not_noisy_df_test, noisy_df_test])
 
     # shuffle data another time
     final_df_train = shuffle(final_df_train)

+ 38 - 6
models.py

@@ -6,18 +6,27 @@ from sklearn.neighbors import KNeighborsClassifier
 from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.feature_selection import RFECV
 import sklearn.svm as svm
+from sklearn.metrics import accuracy_score
+from thundersvm import SVC
 
+# variables and parameters
+n_predict = 0
+
+def my_accuracy_scorer(*args):
+        global n_predict
+        score = accuracy_score(*args)
+        print('{0} - Score is {1}'.format(n_predict, score))
+        n_predict += 1
+        return score
 
 def _get_best_model(X_train, y_train):
 
-    #Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
-    Cs = [1, 2, 4, 8, 16, 32]
-    # gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
-    gammas = [0.001, 0.1, 1, 10, 100]
+    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
+    gammas = [0.001, 0.01, 0.1, 5, 10, 100]
     param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
 
-    svc = svm.SVC(probability=True)
-    clf = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', verbose=2)
+    svc = svm.SVC(probability=True, class_weight='balanced')
+    clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
 
     clf.fit(X_train, y_train)
 
@@ -30,6 +39,26 @@ def svm_model(X_train, y_train):
     return _get_best_model(X_train, y_train)
 
 
+def _get_best_gpu_model(X_train, y_train):
+
+    Cs = [0.001, 0.01, 0.1, 1, 2, 5, 10, 100, 1000]
+    gammas = [0.001, 0.01, 0.1, 1, 2, 5, 10, 100]
+    param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
+
+    svc = SVC(probability=True, class_weight='balanced')
+    clf = GridSearchCV(svc, param_grid, cv=10, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
+
+    clf.fit(X_train, y_train)
+
+    model = clf.best_estimator_
+
+    return model
+
+def svm_gpu(X_train, y_train):
+
+    return _get_best_gpu_model(X_train, y_train)
+
+
 def ensemble_model(X_train, y_train):
 
     svm_model = _get_best_model(X_train, y_train)
@@ -72,6 +101,9 @@ def get_trained_model(choice, X_train, y_train):
     if choice == 'svm_model':
         return svm_model(X_train, y_train)
 
+    if choice == 'svm_gpu':
+        return svm_gpu(X_train, y_train)
+
     if choice == 'ensemble_model':
         return ensemble_model(X_train, y_train)
 

+ 2 - 2
train_model_attributes.py

@@ -67,8 +67,8 @@ def main():
     not_noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 0]
     nb_noisy_test = len(noisy_df_test.index)
 
-    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
-    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
+    final_df_train = pd.concat([not_noisy_df_train, noisy_df_train])
+    final_df_test = pd.concat([not_noisy_df_test, noisy_df_test])
 
     # shuffle data another time
     final_df_train = shuffle(final_df_train)

+ 2 - 2
train_model_filters.py

@@ -67,8 +67,8 @@ def main():
     not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
     nb_noisy_test = len(noisy_df_test.index)
 
-    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
-    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
+    final_df_train = pd.concat([not_noisy_df_train, noisy_df_train])
+    final_df_test = pd.concat([not_noisy_df_test, noisy_df_test])
 
     # shuffle data another time
     final_df_train = shuffle(final_df_train)