Estou tentando criar um o dataframe spark
from pyspark.sql import SparkSession
spark = SparkSession.builder\
.master('local[*]')\
.appName('CriarDf')\
.getOrCreate()
data = [('Zeca', '35'), ('Eva', '29')]
colNames = ['Nome', 'Idade']
df = spark.createDataFrame(data, colNames)
df.show()
Mas está retornando o seguinte erro:
Py4JJavaError Traceback (most recent call last)
Cell In[8], line 11
9 colNames = ['Nome', 'Idade']
10 df = spark.createDataFrame(data, colNames)
---> 11 df.show()
File C:\Spark\spark-3.5.0-bin-hadoop3\python\pyspark\sql\dataframe.py:959, in DataFrame.show(self, n, truncate, vertical)
953 raise PySparkTypeError(
954 error_class="NOT_BOOL",
955 message_parameters={"arg_name": "vertical", "arg_type": type(vertical).__name__},
956 )
958 if isinstance(truncate, bool) and truncate:
--> 959 print(self._jdf.showString(n, 20, vertical))
960 else:
961 try:
File C:\Spark\spark-3.5.0-bin-hadoop3\python\lib\py4j-0.10.9.7-src.zip\py4j\java_gateway.py:1322, in JavaMember.__call__(self, *args)
1316 command = proto.CALL_COMMAND_NAME +\
1317 self.command_header +\
1318 args_command +\
1319 proto.END_COMMAND_PART
1321 answer = self.gateway_client.send_command(command)
-> 1322 return_value = get_return_value(
1323 answer, self.gateway_client, self.target_id, self.name)
...
at java.base/java.io.DataInputStream.readFully(DataInputStream.java:210)
at java.base/java.io.DataInputStream.readInt(DataInputStream.java:385)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:774)