import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import train_test_split
url = r'\ALURA\05_Data_Science\raw\hoteis.csv'
df = pd.read_csv(url)
df.head()
sns.pairplot(data=df, y_vars='Preco', x_vars=['Estrelas', 'ProximidadeTurismo', 'Capacidade'], kind='scatter', diag_kind='hist');
X = df.drop(columns=['Preco'])
y = df['Preco']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=10)
import statsmodels.api as sm
modelo_1 = sm.OLS(y_train, sm.add_constant(X_train)).fit()
modelo_2 = sm.OLS(y_train, sm.add_constant(X_train[['Estrelas']])).fit()
modelo_3 = sm.OLS(y_train, sm.add_constant(X_train[['Estrelas', 'Capacidade']])).fit()
modelo_4 = sm.OLS(y_train, sm.add_constant(X_train[['Estrelas', 'ProximidadeTurismo']])).fit()
print("Modelo 1:", modelo_1.summary())
print("Modelo 2:", modelo_2.summary())
print("Modelo 3:", modelo_3.summary())
print("Modelo 4:", modelo_4.summary())