from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
housing = fetch_california_housing(as_frame=True)
X = housing.data
y = (housing.target > housing.target.mean()).astype(int) # classificação binária: acima/abaixo da média
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
param_grid = {
'max_depth': [3, 5, 10, None],
'min_samples_split': [2, 5, 10],
'criterion': ['gini', 'entropy']
}
grid_search = GridSearchCV(DecisionTreeClassifier(random_state=42), param_grid, cv=5)
grid_search.fit(X_train, y_train)
print("Melhores parâmetros:", grid_search.best_params_)
y_pred = grid_search.best_estimator_.predict(X_test)
print("Acurácia no teste:", accuracy_score(y_test, y_pred))
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SelectKBest, f_regression
diabetes = load_diabetes()
X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
y = diabetes.target
selector = SelectKBest(score_func=f_regression, k=5)
X_new = selector.fit_transform(X, y)
selected_features = pd.DataFrame({
'Feature': X.columns,
'Score': selector.scores_
}).sort_values(by='Score', ascending=False)
print(selected_features.head(10))