6 years ago · cc1b759bc7
--- a/models.py
+++ b/models.py
@@ -65,26 +65,19 @@ def ensemble_model_v2(X_train, y_train):
 
				 
			
 
				 def rfe_svm_model(X_train, y_train, n_components=1):
			
 
				 
			
 
				-    Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
			
 
				-    gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
			
 
				-    param_grid = [{'estimator__C': Cs, 'estimator__gamma' : gammas}]
			
 
				+    # Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
			
 
				+    # gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
			
 
				+    # param_grid = [{'estimator__C': Cs, 'estimator__gamma' : gammas}]
			
 
				+
			
 
				+    gammas = [0.001, 0.01, 0.1]
			
 
				+    param_grid = [{'estimator__gamma' : gammas}]
			
 
				 
			
 
				     estimator = svm.SVC(kernel="linear")
			
 
				     selector = RFECV(estimator, step=1, cv=4, verbose=0)
			
 
				     clf = GridSearchCV(selector, param_grid, cv=5, verbose=1)
			
 
				     clf.fit(X_train, y_train)
			
 
				 
			
 
				-    print(clf.best_estimator_)
			
 
				-    print('------------------------------')
			
 
				-    print(clf.best_estimator_.n_features_)
			
 
				-    print('------------------------------')
			
 
				-    print(clf.best_estimator_.ranking_)
			
 
				-    print('------------------------------')
			
 
				-    print(clf.best_estimator_.support_)
			
 
				-    print('------------------------------')
			
 
				-    print(clf.best_estimator_.grid_scores_)
			
 
				-
			
 
				-    return clf.best_estimator_.estimator_
			
 
				+    return (clf.best_estimator_, clf.best_estimator_.support_)
			
 
				 
			
 
				 
			
 
				 def get_trained_model(choice, X_train, y_train):
			
--- a/test_rfe.py
+++ b/test_rfe.py
@@ -1,13 +0,0 @@
 
				-from sklearn.datasets import make_friedman1
			
 
				-from sklearn.feature_selection import RFECV
			
 
				-from sklearn.model_selection import GridSearchCV
			
 
				-from sklearn.svm import SVR
			
 
				-X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
			
 
				-param_grid = [{'estimator__C': [0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]}]
			
 
				-estimator = SVR(kernel="linear")
			
 
				-selector = RFECV(estimator, step=1, cv=4)
			
 
				-clf = GridSearchCV(selector, param_grid, cv=7)
			
 
				-clf.fit(X, y)
			
 
				-print(clf.best_estimator_.estimator_)
			
 
				-print(clf.best_estimator_.grid_scores_)
			
 
				-print(clf.best_estimator_.ranking_)
			
--- a/train_model.py
+++ b/train_model.py
@@ -88,7 +88,11 @@ def main():
 
				 
			
 
				     print("-------------------------------------------")
			
 
				     print("Train dataset size: ", final_df_train_size)
			
 
				-    model = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
			
 
				+    if p_choice == 'rfe_svm_model': 
			
 
				+        model, indices = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
			
 
				+        selected_indices = [(i+1) for i in np.arange(len(indices)) if indices[i] == True]
			
 
				+    else:
			
 
				+        model = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
			
 
				 
			
 
				     #######################
			
 
				     # 3. Fit model : use of cross validation to fit model
			
@@ -112,6 +116,10 @@ def main():
 
				 
			
 
				     X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
			
 
				 
			
 
				+    if p_choice == 'rfe_svm_model': 
			
 
				+        X_test = X_test.loc[:, selected_indices]
			
 
				+        X_val = X_val.loc[:, selected_indices]
			
 
				+
			
 
				     y_test_model = model.predict(X_test)
			
 
				     y_val_model = model.predict(X_val)