Estou com o erro abaixo onde não consigo simular as atividades logo no inicio da chamada do dataframe.
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('alura').getOrCreate()
dataframe_site = spark.read.json('site.json')
Py4JJavaError Traceback (most recent call last)
C:\Spark\python\pyspark\sql\utils.py in deco(*a, **kw)
97 try:
---> 98 return f(*a, **kw)
99 except py4j.protocol.Py4JJavaError as e:
c:\users\g1745 iron\appdata\local\programs\python\python37\lib\site-packages\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name)
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
Py4JJavaError: An error occurred while calling o71.json.
: org.apache.spark.sql.AnalysisException: Path does not exist: file:/C:/Users/G1745 IRON/OneDrive - Fundação Instituto de Administração/CURSOS/ALURA/SPARK/site.json;
at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$1(DataSource.scala:759)
at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
at org.apache.spark.sql.execution.datasources.DataSource$.checkAndGlobPathIfNecessary(DataSource.scala:746)
at org.apache.spark.sql.execution.datasources.v2.FileTable.fileIndex$lzycompute(FileTable.scala:56)
at org.apache.spark.sql.execution.datasources.v2.FileTable.fileIndex(FileTable.scala:44)
at org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$4(FileTable.scala:69)
at scala.Option.orElse(Option.scala:447)
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:69)
at org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:63)
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:82)
at org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:80)
at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation$.create(DataSourceV2Relation.scala:141)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$1(DataFrameReader.scala:225)
at scala.Option.map(Option.scala:230)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:206)
at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:411)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
During handling of the above exception, another exception occurred:
AnalysisException Traceback (most recent call last)
<ipython-input-10-c980055f5db3> in <module>
----> 1 dataframe_site = spark.read.json('site.json')
C:\Spark\python\pyspark\sql\readwriter.py in json(self, path, schema, primitivesAsString, prefersDecimal, allowComments, allowUnquotedFieldNames, allowSingleQuotes, allowNumericLeadingZero, allowBackslashEscapingAnyCharacter, mode, columnNameOfCorruptRecord, dateFormat, timestampFormat, multiLine, allowUnquotedControlChars, lineSep, samplingRatio, dropFieldIfAllNull, encoding, locale, recursiveFileLookup)
275 path = [path]
276 if type(path) == list:
--> 277 return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))
278 elif isinstance(path, RDD):
279 def func(iterator):