models.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. # models imports
  2. import numpy as np
  3. from sklearn.model_selection import GridSearchCV
  4. from sklearn.linear_model import LogisticRegression
  5. from sklearn.ensemble import RandomForestClassifier, VotingClassifier
  6. from sklearn.neighbors import KNeighborsClassifier
  7. from sklearn.ensemble import GradientBoostingClassifier
  8. from sklearn.feature_selection import RFECV
  9. import sklearn.svm as svm
  10. from sklearn.metrics import accuracy_score
  11. from thundersvm import SVC
  12. from sklearn.model_selection import KFold, cross_val_score
  13. # variables and parameters
  14. n_predict = 0
  15. # def my_accuracy_scorer(*args):
  16. # global n_predict
  17. # score = accuracy_score(*args)
  18. # print('{0} - Score is {1}'.format(n_predict, score))
  19. # n_predict += 1
  20. # return score
  21. def _get_best_model(X_train, y_train):
  22. Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  23. gammas = [0.001, 0.01, 0.1, 5, 10, 100]
  24. param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
  25. svc = svm.SVC(probability=True, class_weight='balanced')
  26. #clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
  27. clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, n_jobs=-1)
  28. clf.fit(X_train, y_train)
  29. model = clf.best_estimator_
  30. return model
  31. def svm_model(X_train, y_train):
  32. return _get_best_model(X_train, y_train)
  33. def _get_best_gpu_model(X_train, y_train):
  34. # Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  35. # gammas = [0.001, 0.01, 0.1, 5, 10, 100]
  36. # param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
  37. # svc = SVC(probability=True, class_weight='balanced')
  38. # clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
  39. # clf.fit(X_train, y_train)
  40. Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  41. gammas = [0.001, 0.01, 0.1, 5, 10, 100]
  42. bestModel = None
  43. bestScore = 0.
  44. n_eval = 1
  45. k_fold = KFold(n_splits=5)
  46. for c in Cs:
  47. for g in gammas:
  48. svc = SVC(probability=True, class_weight='balanced', kernel='rbf', gamma=g, C=c)
  49. svc.fit(X_train, y_train)
  50. score = cross_val_score(svc, X_train, y_train, cv=k_fold, n_jobs=-1)
  51. score = np.mean(score)
  52. # keep track of best model
  53. if score > bestScore:
  54. bestScore = score
  55. bestModel = svc
  56. print('Eval n° {} [C: {}, gamma: {}] => [score: {}, bestScore: {}]'.format(n_eval, c, g, score, bestScore))
  57. n_eval += 1
  58. return bestModel
  59. def svm_gpu(X_train, y_train):
  60. return _get_best_gpu_model(X_train, y_train)
  61. def ensemble_model(X_train, y_train):
  62. svm_model = _get_best_model(X_train, y_train)
  63. lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
  64. rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
  65. ensemble_model = VotingClassifier(estimators=[
  66. ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)], voting='soft', weights=[1,1,1])
  67. ensemble_model.fit(X_train, y_train)
  68. return ensemble_model
  69. def ensemble_model_v2(X_train, y_train):
  70. svm_model = _get_best_model(X_train, y_train)
  71. knc_model = KNeighborsClassifier(n_neighbors=2)
  72. gbc_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
  73. lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
  74. rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
  75. ensemble_model = VotingClassifier(estimators=[
  76. ('lr', lr_model),
  77. ('knc', knc_model),
  78. ('gbc', gbc_model),
  79. ('svm', svm_model),
  80. ('rf', rf_model)],
  81. voting='soft', weights=[1, 1, 1, 1, 1])
  82. ensemble_model.fit(X_train, y_train)
  83. return ensemble_model
  84. def get_trained_model(choice, X_train, y_train):
  85. if choice == 'svm_model':
  86. return svm_model(X_train, y_train)
  87. if choice == 'svm_gpu':
  88. return svm_gpu(X_train, y_train)
  89. if choice == 'ensemble_model':
  90. return ensemble_model(X_train, y_train)
  91. if choice == 'ensemble_model_v2':
  92. return ensemble_model_v2(X_train, y_train)