Ao realizar a ação de ler o diretório em Parquet, me retorna o erro abaixo:
empresas_parquet = spark.read.parquet(
path='C:\dev\projeto-spark\empresas\parquet'
)
---------------------------------------------------------------------------
AnalysisException Traceback (most recent call last)
<ipython-input-96-df496573f26c> in <module>
1 empresas_parquet = spark.read.parquet(
----> 2 path='C:\dev\projeto-spark\empresas\parquet'
3 )
C:\dev\spark-3.3.0-bin-hadoop3\python\pyspark\sql\readwriter.py in parquet(self, *paths, **options)
362 )
363
--> 364 return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths)))
365
366 def text(
C:\dev\spark-3.3.0-bin-hadoop3\python\lib\py4j-0.10.9.5-src.zip\py4j\java_gateway.py in __call__(self, *args)
1320 answer = self.gateway_client.send_command(command)
1321 return_value = get_return_value(
-> 1322 answer, self.gateway_client, self.target_id, self.name)
1323
1324 for temp_arg in temp_args:
C:\dev\spark-3.3.0-bin-hadoop3\python\pyspark\sql\utils.py in deco(*a, **kw)
194 # Hide where the exception came from that shows a non-Pythonic
195 # JVM exception message.
--> 196 raise converted from None
197 else:
198 raise
AnalysisException: Unable to infer schema for Parquet. It must be specified manually.