models.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. # models imports
  2. from sklearn.model_selection import GridSearchCV
  3. from sklearn.linear_model import LogisticRegression
  4. from sklearn.ensemble import RandomForestClassifier, VotingClassifier
  5. from sklearn.neighbors import KNeighborsClassifier
  6. from sklearn.ensemble import GradientBoostingClassifier
  7. from sklearn.feature_selection import RFECV
  8. import sklearn.svm as svm
  9. def _get_best_model(X_train, y_train):
  10. #Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  11. Cs = [1, 2, 4, 8, 16, 32]
  12. # gammas = [0.001, 0.01, 0.1, 1, 5, 10, 100]
  13. gammas = [0.001, 0.1, 1, 10, 100]
  14. param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
  15. svc = svm.SVC(probability=True)
  16. clf = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', verbose=2)
  17. clf.fit(X_train, y_train)
  18. model = clf.best_estimator_
  19. return model
  20. def svm_model(X_train, y_train):
  21. return _get_best_model(X_train, y_train)
  22. def ensemble_model(X_train, y_train):
  23. svm_model = _get_best_model(X_train, y_train)
  24. lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
  25. rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
  26. ensemble_model = VotingClassifier(estimators=[
  27. ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)], voting='soft', weights=[1,1,1])
  28. ensemble_model.fit(X_train, y_train)
  29. return ensemble_model
  30. def ensemble_model_v2(X_train, y_train):
  31. svm_model = _get_best_model(X_train, y_train)
  32. knc_model = KNeighborsClassifier(n_neighbors=2)
  33. gbc_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
  34. lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
  35. rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
  36. ensemble_model = VotingClassifier(estimators=[
  37. ('lr', lr_model),
  38. ('knc', knc_model),
  39. ('gbc', gbc_model),
  40. ('svm', svm_model),
  41. ('rf', rf_model)],
  42. voting='soft', weights=[1, 1, 1, 1, 1])
  43. ensemble_model.fit(X_train, y_train)
  44. return ensemble_model
  45. def get_trained_model(choice, X_train, y_train):
  46. if choice == 'svm_model':
  47. return svm_model(X_train, y_train)
  48. if choice == 'ensemble_model':
  49. return ensemble_model(X_train, y_train)
  50. if choice == 'ensemble_model_v2':
  51. return ensemble_model_v2(X_train, y_train)