Olá,
Usando a mesma base de dados da aula, usei os classificadores RandomForest, SGD E SVC para fazer a classifição; depois, tracei a confusion matrix de cada um, encontrei as probabilidades e encontrei o roc auc score e tracei as roc curves dos classificadores.
Agora tenho o false alarm rate, o hit rate e o threshold. Consegui utilizar o RocCurveDisplay do sklearn, mas gostaria de fazer a plotagem completa usando o seaborn, que permite maior customização.
Enfim: gostaria de saber como plotar a roc curve usando o seaborn, discriminando as probabilidades (com detalhes, para visualização), thresholds e os false alarm rates x hit rates dos modelos no mesmo gráfico.
Muito obrigado!
Esse é o código:
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, RocCurveDisplay
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV
import seaborn as sns
from numpy import random
RANDOM = 123
random.seed(RANDOM)
raw_data = pd.read_csv('C:/Users/Laryssa/Documents/designpatterns_tres/data_science_aplicada/Customer-Churn.csv')
pd.set_option('display.max_columns', 39)
print(raw_data.head())
dict_churn = {'Sim': 1,
'Nao': 0}
x = raw_data.drop(columns=('Churn'), axis = 1)
y = raw_data['Churn'].replace(dict_churn)
x_dummies = pd.get_dummies(x)
print(x_dummies)
standard_scaler = StandardScaler()
x_normalized = standard_scaler.fit_transform(x_dummies)
x_data = pd.DataFrame(x_normalized, columns=x_dummies.columns)
print(x_data)
x_train, x_test, y_train, y_test = train_test_split(x_data, y, test_size=0.3)
#Random Forest
rf_classifier = RandomForestClassifier()
rf_classifier.fit(x_train, y_train)
y_pred_rf = rf_classifier.predict(x_test)
#SGD Classifier
sgd_classifier_without_calibration = SGDClassifier()
sgd_classifier = CalibratedClassifierCV(sgd_classifier_without_calibration)
sgd_classifier.fit(x_train, y_train)
y_pred_sgd = sgd_classifier.predict(x_test)
#SVC rbf
svc_classifier_rbf = SVC(kernel='rbf', probability=True)
svc_classifier_rbf.fit(x_train, y_train)
y_pred_svc_rbf = svc_classifier_rbf.predict(x_test)
#SVC sigmoid
svc_classifier_sigmoid = SVC(kernel='sigmoid', probability=True)
svc_classifier_sigmoid.fit(x_train, y_train)
y_pred_svc_sigmoid = svc_classifier_sigmoid.predict(x_test)
matriz_confusao_rf = confusion_matrix(y_test, y_pred_rf)
matriz_confusao_sgd = confusion_matrix(y_test, y_pred_sgd)
matriz_confusao_svc_rbf = confusion_matrix(y_test, y_pred_svc_rbf)
matriz_confusao_svc_sigmoid = confusion_matrix(y_test, y_pred_svc_sigmoid)
r_probs = [0 for _ in range(len(y_test))]
r_probs = r_probs[:]
sgd_probs = sgd_classifier.predict_proba(x_test)
sgd_probs = sgd_probs[:, 1]
rf_probs = rf_classifier.predict_proba(x_test)
rf_probs = rf_probs[:, 1]
svc_rbf_probs = svc_classifier_rbf.predict_proba(x_test)
svc_rbf_probs = svc_rbf_probs[:, 1]
svc_sigmoid_probs = svc_classifier_sigmoid.predict_proba(x_test)
svc_sigmoid_probs = svc_sigmoid_probs[:, 1]
auc_r = roc_auc_score(y_test, r_probs)
auc_sgd = roc_auc_score(y_test, sgd_probs)
auc_rf = roc_auc_score(y_test, rf_probs)
auc_svc_rbf = roc_auc_score(y_test, svc_rbf_probs)
auc_svc_sigmoid = roc_auc_score(y_test, svc_sigmoid_probs)
fp_r, tp_r, threshold_r = roc_curve(y_test, r_probs)
fp_rf, tp_rf, threshold_rf = roc_curve(y_test, rf_probs)
fp_sgd, tp_sgd, threshold_sgd = roc_curve(y_test, sgd_probs)
fp_scv_sigmoid, tp_scv_sigmoid, threshold_scv_sigmoid = roc_curve(y_test, svc_sigmoid_probs)
fp_scv_rbf, tp_scv_rbf, threshold_scv_rbf = roc_curve(y_test, svc_rbf_probs)
print("r:", auc_r)
print("SGD:", auc_sgd)
print("RF:", auc_rf)
print("SVC Sigmoid:", auc_svc_sigmoid)
print("SVC RBF:", auc_svc_rbf)
display = RocCurveDisplay(fpr=fp_rf, tpr=tp_rf, roc_auc=auc_rf,
estimator_name='example estimator')
display.plot()
print(fp_r, tp_r, threshold_r)
print(fp_rf, tp_rf, threshold_rf)