models.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # models imports
  2. from sklearn.model_selection import GridSearchCV
  3. from sklearn.linear_model import LogisticRegression
  4. from sklearn.ensemble import RandomForestClassifier, VotingClassifier
  5. from sklearn.neighbors import KNeighborsClassifier
  6. from sklearn.ensemble import GradientBoostingClassifier
  7. from sklearn.feature_selection import RFECV, RFE
  8. from sklearn.metrics import roc_auc_score
  9. import sklearn.svm as svm
  10. def _roc_auc_scorer(estimator, X, y):
  11. y_pred = estimator.predict(X)
  12. return roc_auc_score(y, y_pred)
  13. def _get_best_model(X_train, y_train):
  14. Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  15. gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
  16. param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
  17. svc = svm.SVC(probability=True)
  18. clf = GridSearchCV(svc, param_grid, cv=10, scoring=_roc_auc_scorer, verbose=0)
  19. clf.fit(X_train, y_train)
  20. model = clf.best_estimator_
  21. return model
  22. def svm_model(X_train, y_train):
  23. return _get_best_model(X_train, y_train)
  24. def rfecv_svm_model(X_train, y_train, n_components=1):
  25. Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  26. gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
  27. param_grid = [{'estimator__C': Cs, 'estimator__gamma' : gammas}]
  28. estimator = svm.SVC(kernel="linear")
  29. selector = RFECV(estimator, step=1, cv=4, verbose=1)
  30. clf = GridSearchCV(selector, param_grid, cv=5, verbose=1, scoring=_roc_auc_scorer)
  31. clf.fit(X_train, y_train)
  32. return clf.best_estimator_
  33. def rfe_svm_model(X_train, y_train, n_components=1):
  34. Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  35. gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
  36. param_grid = [{'estimator__C': Cs, 'estimator__gamma' : gammas}]
  37. estimator = svm.SVC(kernel="linear")
  38. selector = RFE(estimator, step=1, n_features_to_select=n_components, verbose=1)
  39. clf = GridSearchCV(selector, param_grid, cv=5, verbose=1, scoring=_roc_auc_scorer)
  40. clf.fit(X_train, y_train)
  41. return clf.best_estimator_
  42. def get_trained_model(choice, X_train, y_train, n_components=1):
  43. if choice == 'svm_model':
  44. return svm_model(X_train, y_train)
  45. if choice == 'rfe_svm_model':
  46. return rfe_svm_model(X_train, y_train, n_components)
  47. if choice == 'rfcecv_svm_model':
  48. return rfecv_svm_model(X_train, y_train, n_components)