|
@@ -4,14 +4,14 @@ from sklearn.linear_model import LogisticRegression
|
|
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
|
|
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
from sklearn.ensemble import GradientBoostingClassifier
|
|
from sklearn.ensemble import GradientBoostingClassifier
|
|
-from sklearn.feature_selection import RFECV
|
|
|
|
|
|
+from sklearn.feature_selection import RFECV, RFE
|
|
from sklearn.metrics import roc_auc_score
|
|
from sklearn.metrics import roc_auc_score
|
|
import sklearn.svm as svm
|
|
import sklearn.svm as svm
|
|
|
|
|
|
def _roc_auc_scorer(estimator, X, y):
|
|
def _roc_auc_scorer(estimator, X, y):
|
|
-
|
|
|
|
|
|
+
|
|
y_pred = estimator.predict(X)
|
|
y_pred = estimator.predict(X)
|
|
-
|
|
|
|
|
|
+
|
|
return roc_auc_score(y, y_pred)
|
|
return roc_auc_score(y, y_pred)
|
|
|
|
|
|
def _get_best_model(X_train, y_train):
|
|
def _get_best_model(X_train, y_train):
|
|
@@ -33,7 +33,7 @@ def svm_model(X_train, y_train):
|
|
|
|
|
|
return _get_best_model(X_train, y_train)
|
|
return _get_best_model(X_train, y_train)
|
|
|
|
|
|
-def rfe_svm_model(X_train, y_train, n_components=1):
|
|
|
|
|
|
+def rfecv_svm_model(X_train, y_train, n_components=1):
|
|
|
|
|
|
Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
|
|
Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
|
|
gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
|
|
gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
|
|
@@ -47,10 +47,27 @@ def rfe_svm_model(X_train, y_train, n_components=1):
|
|
return clf.best_estimator_
|
|
return clf.best_estimator_
|
|
|
|
|
|
|
|
|
|
-def get_trained_model(choice, X_train, y_train):
|
|
|
|
|
|
+def rfe_svm_model(X_train, y_train, n_components=1):
|
|
|
|
+
|
|
|
|
+ Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
|
|
|
|
+ gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
|
|
|
|
+ param_grid = [{'estimator__C': Cs, 'estimator__gamma' : gammas}]
|
|
|
|
+
|
|
|
|
+ estimator = svm.SVC(kernel="linear")
|
|
|
|
+ selector = RFE(estimator, step=1, n_features_to_select=n_components, verbose=1)
|
|
|
|
+ clf = GridSearchCV(selector, param_grid, cv=5, verbose=1, scoring=_roc_auc_scorer)
|
|
|
|
+ clf.fit(X_train, y_train)
|
|
|
|
+
|
|
|
|
+ return clf.best_estimator_
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_trained_model(choice, X_train, y_train, n_components=1):
|
|
|
|
|
|
if choice == 'svm_model':
|
|
if choice == 'svm_model':
|
|
return svm_model(X_train, y_train)
|
|
return svm_model(X_train, y_train)
|
|
|
|
|
|
if choice == 'rfe_svm_model':
|
|
if choice == 'rfe_svm_model':
|
|
- return rfe_svm_model(X_train, y_train)
|
|
|
|
|
|
+ return rfe_svm_model(X_train, y_train, n_components)
|
|
|
|
+
|
|
|
|
+ if choice == 'rfcecv_svm_model':
|
|
|
|
+ return rfecv_svm_model(X_train, y_train, n_components)
|