models.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. # models imports
  2. from sklearn.model_selection import GridSearchCV
  3. from sklearn.linear_model import LogisticRegression
  4. from sklearn.ensemble import RandomForestClassifier, VotingClassifier
  5. from sklearn.neighbors import KNeighborsClassifier
  6. from sklearn.ensemble import GradientBoostingClassifier
  7. from sklearn.feature_selection import RFECV
  8. import sklearn.svm as svm
  9. def _get_best_model(X_train, y_train):
  10. Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  11. gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
  12. param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
  13. svc = svm.SVC(probability=True)
  14. clf = GridSearchCV(svc, param_grid, cv=10, scoring='accuracy', verbose=0)
  15. clf.fit(X_train, y_train)
  16. model = clf.best_estimator_
  17. return model
  18. def svm_model(X_train, y_train):
  19. return _get_best_model(X_train, y_train)
  20. def ensemble_model(X_train, y_train):
  21. svm_model = _get_best_model(X_train, y_train)
  22. lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
  23. rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
  24. ensemble_model = VotingClassifier(estimators=[
  25. ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)], voting='soft', weights=[1,1,1])
  26. ensemble_model.fit(X_train, y_train)
  27. return ensemble_model
  28. def ensemble_model_v2(X_train, y_train):
  29. svm_model = _get_best_model(X_train, y_train)
  30. knc_model = KNeighborsClassifier(n_neighbors=2)
  31. gbc_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
  32. lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
  33. rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
  34. ensemble_model = VotingClassifier(estimators=[
  35. ('lr', lr_model),
  36. ('knc', knc_model),
  37. ('gbc', gbc_model),
  38. ('svm', svm_model),
  39. ('rf', rf_model)],
  40. voting='soft', weights=[1, 1, 1, 1, 1])
  41. ensemble_model.fit(X_train, y_train)
  42. return ensemble_model
  43. def rfe_svm_model(X_train, y_train, n_components=1):
  44. Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  45. gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
  46. param_grid = {'estimator__kernel':['rbf'], 'estimator__C': Cs, 'estimator__gamma' : gammas}
  47. svc = svm.SVC(probability=True)
  48. rfe_model = RFECV(svc, step=1, cv=10, verbose=0)
  49. clf = GridSearchCV(rfe_model, param_grid, cv=10, scoring='accuracy', verbose=1)
  50. clf.fit(X_train, y_train)
  51. print(clf.best_estimator_)
  52. print('------------------------------')
  53. print(clf.best_estimator_.n_features_)
  54. print('------------------------------')
  55. print(clf.best_estimator_.ranking_)
  56. print('------------------------------')
  57. print(clf.best_estimator_.grid_scores_)
  58. return clf.best_estimator_.estimator_
  59. def get_trained_model(choice, X_train, y_train):
  60. if choice == 'svm_model':
  61. return svm_model(X_train, y_train)
  62. if choice == 'ensemble_model':
  63. return ensemble_model(X_train, y_train)
  64. if choice == 'ensemble_model_v2':
  65. return ensemble_model_v2(X_train, y_train)
  66. if choice == 'rfe_svm_model':
  67. return rfe_svm_model(X_train, y_train)