import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
colunas = [
'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
'Insulin', 'BMI', 'DiabetesPedigreeFunction',
'Age', 'Outcome'
]
df = pd.read_csv(url, names=colunas)
X = df.drop('Outcome', axis=1)
y = df['Outcome']
X_train, X_test, y_train, y_test = train_test_split(
X, y,
test_size=0.3,
random_state=42
)
tree = DecisionTreeClassifier(random_state=42)
param_grid = {
'max_depth': [3, 5, 7, 10],
'min_samples_split': [2, 5, 10],
'criterion': ['gini', 'entropy']
}
grid_search = GridSearchCV(
estimator=tree,
param_grid=param_grid,
cv=5,
scoring='accuracy'
)
grid_search.fit(X_train, y_train)
print("Melhores parâmetros:")
print(grid_search.best_params_)
print("\nMelhor score:")
print(grid_search.best_score_)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("\nAcurácia no conjunto de teste:")
print(accuracy_score(y_test, y_pred))
print("\nRelatório:")
print(classification_report(y_test, y_pred))
import pandas as pd
importancias = best_model.feature_importances_
resultado = pd.DataFrame({
'Feature': X.columns,
'Importancia': importancias
})
resultado = resultado.sort_values(
by='Importancia',
ascending=False
)
print(resultado)
import matplotlib.pyplot as plt
resultado.plot(
x='Feature',
y='Importancia',
kind='bar'
)
plt.title('Importância das Variáveis')
plt.ylabel('Importância')
plt.xlabel('Características')
plt.tight_layout()
plt.show()
Melhores parâmetros:
{'criterion': 'gini', 'max_depth': 5, 'min_samples_split': 10}
Melhor score:
0.7522845275181724
Acurácia no conjunto de teste:
0.7532467532467533
Relatório:
precision recall f1-score support
0 0.78 0.86 0.82 151
1 0.68 0.55 0.61 80
accuracy 0.75 231
macro avg 0.73 0.71 0.71 231
weighted avg 0.75 0.75 0.75 231
Feature Importancia
1 Glucose 0.551588
5 BMI 0.198030
7 Age 0.170566
6 DiabetesPedigreeFunction 0.046163
2 BloodPressure 0.033652
0 Pregnancies 0.000000
3 SkinThickness 0.000000
4 Insulin 0.000000