import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.preprocessing import OneHotEncoder
from sklearn.cluster import KMeans
url = 'https://raw.githubusercontent.com/alura-cursos/Clusterizacao-dados-sem-rotulo/main/Dados/dados_mkt.csv'
df = pd.read_csv(url)
encoder = OneHotEncoder(categories=[['F','M','NE']], sparse_output=False)
encoded = encoder.fit_transform(df[['sexo']])
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(['sexo']))
dados = pd.concat([df, encoded_df], axis=1).drop('sexo', axis=1)
joblib.dump(encoder, 'encoder.pkl')
modelo = KMeans(n_clusters=2, random_state=45)
modelo.fit(dados)
dados['cluster'] = modelo.labels_
print(dados.head())