models.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. # models imports
  2. from sklearn.model_selection import GridSearchCV
  3. from sklearn.linear_model import LogisticRegression
  4. from sklearn.ensemble import RandomForestClassifier, VotingClassifier
  5. from sklearn.neighbors import KNeighborsClassifier
  6. from sklearn.ensemble import GradientBoostingClassifier
  7. from sklearn.feature_selection import RFECV
  8. import sklearn.svm as svm
  9. from sklearn.metrics import accuracy_score
  10. from thundersvm import SVC
  11. # variables and parameters
  12. n_predict = 0
  13. def my_accuracy_scorer(*args):
  14. global n_predict
  15. score = accuracy_score(*args)
  16. print('{0} - Score is {1}'.format(n_predict, score))
  17. n_predict += 1
  18. return score
  19. def _get_best_model(X_train, y_train):
  20. Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  21. gammas = [0.001, 0.01, 0.1, 5, 10, 100]
  22. param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
  23. svc = svm.SVC(probability=True, class_weight='balanced')
  24. clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
  25. clf.fit(X_train, y_train)
  26. model = clf.best_estimator_
  27. return model
  28. def svm_model(X_train, y_train):
  29. return _get_best_model(X_train, y_train)
  30. def _get_best_gpu_model(X_train, y_train):
  31. # Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  32. # gammas = [0.001, 0.01, 0.1, 5, 10, 100]
  33. # param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
  34. # svc = SVC(probability=True, class_weight='balanced')
  35. # clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
  36. # clf.fit(X_train, y_train)
  37. Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  38. gammas = [0.001, 0.01, 0.1, 5, 10, 100]
  39. bestModel = None
  40. modelScore = 0.
  41. for c in Cs:
  42. for g in gammas:
  43. print('C:', c, ', gamma:', g)
  44. svc = SVC(probability=True, class_weight='balanced', kernel='rbf', gamma=g, C=c)
  45. svc.fit(X_train, y_train)
  46. score = svc.score(X_train, y_train)
  47. if score > modelScore:
  48. modelScore = score
  49. bestModel = svc
  50. return bestModel
  51. def svm_gpu(X_train, y_train):
  52. return _get_best_gpu_model(X_train, y_train)
  53. def ensemble_model(X_train, y_train):
  54. svm_model = _get_best_model(X_train, y_train)
  55. lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
  56. rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
  57. ensemble_model = VotingClassifier(estimators=[
  58. ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)], voting='soft', weights=[1,1,1])
  59. ensemble_model.fit(X_train, y_train)
  60. return ensemble_model
  61. def ensemble_model_v2(X_train, y_train):
  62. svm_model = _get_best_model(X_train, y_train)
  63. knc_model = KNeighborsClassifier(n_neighbors=2)
  64. gbc_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
  65. lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
  66. rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
  67. ensemble_model = VotingClassifier(estimators=[
  68. ('lr', lr_model),
  69. ('knc', knc_model),
  70. ('gbc', gbc_model),
  71. ('svm', svm_model),
  72. ('rf', rf_model)],
  73. voting='soft', weights=[1, 1, 1, 1, 1])
  74. ensemble_model.fit(X_train, y_train)
  75. return ensemble_model
  76. def get_trained_model(choice, X_train, y_train):
  77. if choice == 'svm_model':
  78. return svm_model(X_train, y_train)
  79. if choice == 'svm_gpu':
  80. return svm_gpu(X_train, y_train)
  81. if choice == 'ensemble_model':
  82. return ensemble_model(X_train, y_train)
  83. if choice == 'ensemble_model_v2':
  84. return ensemble_model_v2(X_train, y_train)