Olá boa tarde Como acessar os valores dos campos "product_name", "product_ID" e "product_category" do tipo struct (array aninhados) sem a utilizar o comando "explode" como opção?
root
|-- Product: struct (nullable = true)
| |-- Product_name: string (nullable = true)
| |-- Product_ID: long (nullable = true)
| |-- Product_category: string (nullable = true)
|-- Rating: float (nullable = true)
|-- Price: integer (nullable = true)
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, LongType, StringType, FloatType, ArrayType
input_data = [(("Refrigerator", 112345, "Eletronico"), 4.0, 12499),
(("LED TV", 114567, "Eletronico"), 4.2, 49999),
(("Washing Machine", 113465, "Eletronico"), 3.9, 69999),
(("T-shirt", 124378, "Vestuario"), 4.1, 1999),
(("Jeans", 126754, "Vestuario"), 3.7, 3999),
(("Running Shoes", 134565, "Vestuario"), 4.7, 1499),
(("Face Mask", 145234, "Outros"), 4.6, 999)]
schm = StructType([
StructField('Product', StructType([
StructField('Product_name', StringType(), True),
StructField('Product_ID', LongType(), True),
StructField('Product_category', StringType(), True),
])),
StructField('Rating', FloatType(), True),
StructField('Price', IntegerType(), True)])
df = spark.createDataFrame(data=input_data, schema = schm)
df.printSchema()
df.show(truncate=False)
Obrigada