from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
california = fetch_california_housing()
X = california.data
y = california.target
Dividindo os dados entre treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
Definição dos hiperparâmetros a serem testados
param_grid = {
'max_depth': [3, 5, 7, 10],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
}
Criando o modelo
model = DecisionTreeRegressor()
Aplicando Grid Search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
Exibindo os melhores parâmetros encontrados
print(f"Melhores parâmetros: {grid_search.best_params_}")
import pandas as pd
from sklearn.datasets import load_diabetes
Carregar os dados
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target
Dividir os dados entre treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
Treinar o modelo
model = RandomForestRegressor()
model.fit(X_train, y_train)
Obter a importância das features
importances = model.feature_importances_
feature_names = diabetes.feature_names
Criar um gráfico de barras
plt.figure(figsize=(10, 6))
plt.barh(feature_names, importances)
plt.title("Importância das Features")
plt.xlabel("Importância")
plt.ylabel("Feature")
plt.show()