123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- # models imports
- from sklearn.model_selection import GridSearchCV
- from sklearn.linear_model import LogisticRegression
- from sklearn.ensemble import RandomForestClassifier, VotingClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.ensemble import GradientBoostingClassifier
- from sklearn.feature_selection import RFECV
- import sklearn.svm as svm
- from sklearn.metrics import accuracy_score
- from thundersvm import SVC
- # variables and parameters
- n_predict = 0
- def my_accuracy_scorer(*args):
- global n_predict
- score = accuracy_score(*args)
- print('{0} - Score is {1}'.format(n_predict, score))
- n_predict += 1
- return score
- def _get_best_model(X_train, y_train):
- Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
- gammas = [0.001, 0.01, 0.1, 5, 10, 100]
- param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
- svc = svm.SVC(probability=True, class_weight='balanced')
- clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
- clf.fit(X_train, y_train)
- model = clf.best_estimator_
- return model
- def svm_model(X_train, y_train):
- return _get_best_model(X_train, y_train)
- def _get_best_gpu_model(X_train, y_train):
- # Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
- # gammas = [0.001, 0.01, 0.1, 5, 10, 100]
- # param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
- # svc = SVC(probability=True, class_weight='balanced')
- # clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
- # clf.fit(X_train, y_train)
- Cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
- gammas = [0.001, 0.01, 0.1, 5, 10, 100]
- bestModel = None
- modelScore = 0.
- for c in Cs:
- for g in gammas:
- print('C:', c, ', gamma:', g)
- svc = SVC(probability=True, class_weight='balanced', kernel='rbf', gamma=g, C=c)
- svc.fit(X_train, y_train)
- score = svc.score(X_train, y_train)
- if score > modelScore:
- modelScore = score
- bestModel = svc
- return bestModel
- def svm_gpu(X_train, y_train):
- return _get_best_gpu_model(X_train, y_train)
- def ensemble_model(X_train, y_train):
- svm_model = _get_best_model(X_train, y_train)
- lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
- rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
- ensemble_model = VotingClassifier(estimators=[
- ('svm', svm_model), ('lr', lr_model), ('rf', rf_model)], voting='soft', weights=[1,1,1])
- ensemble_model.fit(X_train, y_train)
- return ensemble_model
- def ensemble_model_v2(X_train, y_train):
- svm_model = _get_best_model(X_train, y_train)
- knc_model = KNeighborsClassifier(n_neighbors=2)
- gbc_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
- lr_model = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=1)
- rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
- ensemble_model = VotingClassifier(estimators=[
- ('lr', lr_model),
- ('knc', knc_model),
- ('gbc', gbc_model),
- ('svm', svm_model),
- ('rf', rf_model)],
- voting='soft', weights=[1, 1, 1, 1, 1])
- ensemble_model.fit(X_train, y_train)
- return ensemble_model
- def get_trained_model(choice, X_train, y_train):
- if choice == 'svm_model':
- return svm_model(X_train, y_train)
- if choice == 'svm_gpu':
- return svm_gpu(X_train, y_train)
- if choice == 'ensemble_model':
- return ensemble_model(X_train, y_train)
- if choice == 'ensemble_model_v2':
- return ensemble_model_v2(X_train, y_train)
|