Realizei todo o código ,mas esse final estou tendo dificuldades para acertar podem me ajudar???
id do projeto = project-e3b69726-3313-4071-96d
caminho das pastas dentro do bucket = alura-fundamentos-nuvem/olist_eccomerce/olist_Ecomerce_arquivos
from google.colab import auth
auth.authenticate_user()
from google.cloud import storage
project_id ='project-e3b69726-3313-4071-96d'
bucket_name = 'alura-fundamentos-nuvem'
file_name = 'BR.json'
client_gcs = storage.Client(project = project_id)
bucket = client_gcs.bucket(bucket_name)
blob = bucket.blob(file_name)
file_content_str = blob.download_as_text()
print(f"Conteúdo do arquivo {file_name}")
print(file_content_str)
file_content_str
import json
dados_feriados = json.loads(file_content_str)
dados_feriados
from google.cloud import bigquery
client_bq = bigquery.Client(project = project_id)
consulta_pedidos = """
SELECT
order_id, order_status, order_purchase_timestamp,
order_estimated_delivery_date, order_delivered_customer_date
FROM `project-e3b69726-3313-4071-96d.olist_dataset_new.orders`
"""
query_job = client_bq.query(consulta_pedidos)
pedido = query_job.to_dataframe()
pedido
consulta_atrasos = """
SELECT order_id, order_estimated_delivery_date,
order_delivered_customer_date,
DATE_DIFF(order_delivered_customer_date, order_estimated_delivery_date, DAY) AS atraso_medio_dias
FROM `project-e3b69726-3313-4071-96d.olist_dataset_new.orders`
WHERE
order_delivered_customer_date IS NOT NULL
AND order_estimated_delivery_date IS NOT NULL
AND order_delivered_customer_date > order_estimated_delivery_date
ORDER BY atraso_medio_dias DESC
"""
results = client_bq.query(consulta_atrasos)
df_atraso = results.to_dataframe()
df_atraso
client_bq
caminho = 'project-e3b69726-3313-4071-96d.olist_ecommerce.pedido_atraso'
schema = [
bigquery.SchemaField("order_id", "STRING"),
bigquery.SchemaField("order_estimated_delivery_date", "TIMESTAMP"),
bigquery.SchemaField("order_delivered_customer_date", "TIMESTAMP"),
bigquery.SchemaField("atraso_medio_dias", "TIMESTAMP"),
]
job_confg = bigquery.LoadJobConfig(
schema = schema,
write_disposition = "WRITE_APPEND"
)
job = client_bq.load_table_from_dataframe(df_atraso, caminho, job_config=job_confg)
job.result()
print(f"Tabela {caminho} carregada com sucesso !!! ")
---------------------------------------------------------------------------
InvalidResponse Traceback (most recent call last)
/usr/local/lib/python3.12/dist-packages/google/cloud/bigquery/client.py in load_table_from_file(self, file_obj, destination, rewind, size, num_retries, job_id, job_id_prefix, location, project, job_config, timeout)
2681 else:
-> 2682 response = self._do_multipart_upload(
2683 file_obj, job_resource, size, num_retries, timeout, project=project
8 frames
InvalidResponse: ('Request failed with status code', 404, 'Expected one of', <HTTPStatus.OK: 200>)
During handling of the above exception, another exception occurred:
NotFound Traceback (most recent call last)
/usr/local/lib/python3.12/dist-packages/google/cloud/bigquery/client.py in load_table_from_file(self, file_obj, destination, rewind, size, num_retries, job_id, job_id_prefix, location, project, job_config, timeout)
2684 )
2685 except resumable_media.InvalidResponse as exc:
-> 2686 raise exceptions.from_http_response(exc.response)
2687
2688 return typing.cast(LoadJob, self.job_from_resource(response.json()))
NotFound: 404 POST https://bigquery.googleapis.com/upload/bigquery/v2/projects/project-e3b69726-3313-4071-96d/jobs?uploadType=multipart: Not found: Dataset project-e3b69726-3313-4071-96d:olist_ecommerce