Já tentei realizar todas tratativas que foram abordadas no forum, e não deu certo, sempre retorna erro:
Alguem sabe? Já tentei de tudo, até rodar no proprio pc, sem o ambiente virtual...
Sgue o cód da aula:
from datetime import datetime
from os.path import join
from airflow.models import DAG
from airflow.plugins_manager import AirflowPlugin
from airflow.operators.alura import TwitterOperator
from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator
# from spark.transformation import twitter_transform
with DAG(dag_id="twitter_dag", start_date=datetime.now()) as dag:
twitter_operator = TwitterOperator(
task_id="twitter_aluraonline",
query="AluraOnline",
file_path=join(
"/home/william/Documentos/ESTUDOS/ALURA-ENGENHARIA_DADOS/datapipeline/datalake",
"twitter_aluraonline",
"extract_date={{ ds }}",
"AluraOnline_{{ ds_nodash }}.json"
)
)
twitter_transform = SparkSubmitOperator(
task_id="transform_twitter_aluraonline",
aplication=("/home/william/Documentos/ESTUDOS/ALURA-ENGENHARIA_DADOS/datapipeline/spark"),
name="twitter_transformation",
application_args=[
"--src",
"/home/william/Documentos/ESTUDOS/ALURA-ENGENHARIA_DADOS/datapipeline/datalake/bronze/twitter_aluraonline/extract_date=2022-05-07",
"--dest",
"/home/william/Documentos/ESTUDOS/ALURA-ENGENHARIA_DADOS/datapipeline/datalake/silver/twitter_aluraonline",
"--process-date",
"{{ ds }}"
]
)