Estou assistindo as aulas do curso "Apache Airflow: transformação de dados com Spark". Na aula 2 atividade 3 - "Instalando o PySpark", segui todos os passos de conforme os vídeos, fiz o "!pip install pyspark", depois importei a biblioteca "from pyspark.sql import SparkSession" sem problemas. Quando chego na parte de criação da sessão recebo um erro. Já procurei na internet e não consegui solucionar o problema.
Segue abaixo o erro. Podem me ajudar? PS: Estou usando o Ubuntu.
---------------------------------------------------------------------------
Py4JError Traceback (most recent call last)
Cell In[15], line 1
----> 1 spark = SparkSession\
2 .builder\
3 .appName("twitter_transformation")\
4 .getOrCreate()
File ~/Documentos/Scripts/twitter/venv/lib/python3.9/site-packages/pyspark/sql/session.py:272, in SparkSession.Builder.getOrCreate(self)
269 sc = SparkContext.getOrCreate(sparkConf)
270 # Do not update `SparkConf` for existing `SparkContext`, as it's shared
271 # by all sessions.
--> 272 session = SparkSession(sc, options=self._options)
273 else:
274 getattr(
275 getattr(session._jvm, "SparkSession$"), "MODULE$"
276 ).applyModifiableSettings(session._jsparkSession, self._options)
File ~/Documentos/Scripts/twitter/venv/lib/python3.9/site-packages/pyspark/sql/session.py:307, in SparkSession.__init__(self, sparkContext, jsparkSession, options)
303 getattr(getattr(self._jvm, "SparkSession$"), "MODULE$").applyModifiableSettings(
304 jsparkSession, options
305 )
306 else:
--> 307 jsparkSession = self._jvm.SparkSession(self._jsc.sc(), options)
308 else:
309 getattr(getattr(self._jvm, "SparkSession$"), "MODULE$").applyModifiableSettings(
310 jsparkSession, options
311 )
File ~/Documentos/Scripts/twitter/venv/lib/python3.9/site-packages/py4j/java_gateway.py:1585, in JavaClass.__call__(self, *args)
1579 command = proto.CONSTRUCTOR_COMMAND_NAME +\
1580 self._command_header +\
1581 args_command +\
1582 proto.END_COMMAND_PART
1584 answer = self._gateway_client.send_command(command)
-> 1585 return_value = get_return_value(
1586 answer, self._gateway_client, None, self._fqn)
1588 for temp_arg in temp_args:
1589 temp_arg._detach()
File ~/Documentos/Scripts/twitter/venv/lib/python3.9/site-packages/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
--> 330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
333 else:
334 raise Py4JError(
335 "An error occurred while calling {0}{1}{2}".
336 format(target_id, ".", name))
Py4JError: An error occurred while calling None.org.apache.spark.sql.SparkSession. Trace:
py4j.Py4JException: Constructor org.apache.spark.sql.SparkSession([class org.apache.spark.SparkContext, class java.util.HashMap]) does not exist
at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:179)
at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:196)
at py4j.Gateway.invoke(Gateway.java:237)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:829)