: Source Auditors organizationId sourceId Source dataframe, .
, , explode env:Body.env:ContentItem
import sqlContext.implicits._
import org.apache.spark.sql.functions._
val dfContentEnvelope = sqlContext.read.format("com.databricks.spark.xml")
.option("rowTag", "env:ContentEnvelope")
.load("s3://trfsmallfffile/XML")
val dfContentItem = dfContentEnvelope.withColumn("column1", explode(dfContentEnvelope("env:Body.env:ContentItem"))).select("column1.*")
val ParentDF=dfContentItem.select($"env:Data.sr:Source._organizationId".as("organizationId"), $"env:Data.sr:Source._sourceId".as("sourceId"), $"env:Data.sr:Source".as("Source"))
+--------------+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|organizationId|sourceId|Source |
+--------------+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|4295906830 |344 |[4295906830,344,[WrappedArray([3541,3024068,UNQ,3010546,true,false,false], [9574,3030421,UWE,3010547,true,false,false])],20171030T00:00:00+00:00,false,false,1.0,20171111T17:00:00+00:00,300,false,10K,3011835,20171030T00:00:00+00:00,SS,1000716240,1]|
+--------------+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
sr:Auditor
val childDF=ParentDF.select($"organizationId", $"sourceId", explode($"Source.sr:Auditors.sr:Auditor").as("Auditors"))
+--------------+--------+-------------------------------------------+
|organizationId|sourceId|Auditors |
+--------------+--------+-------------------------------------------+
|4295906830 |344 |[3541,3024068,UNQ,3010546,true,false,false]|
|4295906830 |344 |[9574,3030421,UWE,3010547,true,false,false]|
+--------------+--------+-------------------------------------------+
,