Browse Source

Add of prediction use

Jérôme BUISINE 5 years ago
parent
commit
cc1b759bc7
3 changed files with 16 additions and 28 deletions
  1. 7 14
      models.py
  2. 0 13
      test_rfe.py
  3. 9 1
      train_model.py

+ 7 - 14
models.py

@@ -65,26 +65,19 @@ def ensemble_model_v2(X_train, y_train):
 
 def rfe_svm_model(X_train, y_train, n_components=1):
 
-    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
-    gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
-    param_grid = [{'estimator__C': Cs, 'estimator__gamma' : gammas}]
+    # Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
+    # gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
+    # param_grid = [{'estimator__C': Cs, 'estimator__gamma' : gammas}]
+
+    gammas = [0.001, 0.01, 0.1]
+    param_grid = [{'estimator__gamma' : gammas}]
 
     estimator = svm.SVC(kernel="linear")
     selector = RFECV(estimator, step=1, cv=4, verbose=0)
     clf = GridSearchCV(selector, param_grid, cv=5, verbose=1)
     clf.fit(X_train, y_train)
 
-    print(clf.best_estimator_)
-    print('------------------------------')
-    print(clf.best_estimator_.n_features_)
-    print('------------------------------')
-    print(clf.best_estimator_.ranking_)
-    print('------------------------------')
-    print(clf.best_estimator_.support_)
-    print('------------------------------')
-    print(clf.best_estimator_.grid_scores_)
-
-    return clf.best_estimator_.estimator_
+    return (clf.best_estimator_, clf.best_estimator_.support_)
 
 
 def get_trained_model(choice, X_train, y_train):

+ 0 - 13
test_rfe.py

@@ -1,13 +0,0 @@
-from sklearn.datasets import make_friedman1
-from sklearn.feature_selection import RFECV
-from sklearn.model_selection import GridSearchCV
-from sklearn.svm import SVR
-X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
-param_grid = [{'estimator__C': [0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]}]
-estimator = SVR(kernel="linear")
-selector = RFECV(estimator, step=1, cv=4)
-clf = GridSearchCV(selector, param_grid, cv=7)
-clf.fit(X, y)
-print(clf.best_estimator_.estimator_)
-print(clf.best_estimator_.grid_scores_)
-print(clf.best_estimator_.ranking_)

+ 9 - 1
train_model.py

@@ -88,7 +88,11 @@ def main():
 
     print("-------------------------------------------")
     print("Train dataset size: ", final_df_train_size)
-    model = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
+    if p_choice == 'rfe_svm_model': 
+        model, indices = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
+        selected_indices = [(i+1) for i in np.arange(len(indices)) if indices[i] == True]
+    else:
+        model = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
 
     #######################
     # 3. Fit model : use of cross validation to fit model
@@ -112,6 +116,10 @@ def main():
 
     X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
 
+    if p_choice == 'rfe_svm_model': 
+        X_test = X_test.loc[:, selected_indices]
+        X_val = X_val.loc[:, selected_indices]
+
     y_test_model = model.predict(X_test)
     y_val_model = model.predict(X_val)