1 year ago
#387661

Ajay Makkar
JSON string to dataframe "change schema" schema contain Ambiguous column - Spark Scala
How to convert this JSON to Dataframe in scala
json_string = """{
"module": {
"col1": "a",
"col2": {
"5": 1,
"3": 4,
"4": {
"numeric reasoning": 2,
"verbal": 4
},
"7": {
"landline": 2,
"landLine": 4
}
}
}
}"""
Function I use -
val jsonRDD = spark.parallelize(json_string::Nil)
val jsonDF = sqlContext.read.json(jsonRDD)
val df = flattenRecursive(jsonDF)
df.show()
def flattenRecursive(df: DataFrame): DataFrame = {
val fields = df.schema.fields
val fieldNames = fields.map(x => x.name)
val length = fields.length
for(i <- 0 to fields.length-1){
val field = fields(i)
val fieldtype = field.dataType
val fieldName = field.name
fieldtype match {
case arrayType: ArrayType =>
println("flatten array")
val newfieldNames = fieldNames.filter(_!=fieldName) ++ Array("explode_outer(".concat(fieldName).concat(") as ").concat(fieldName))
val explodedDf = df.selectExpr(newfieldNames:_*)
return flattenRecursive(explodedDf)
case structType: StructType =>
println("flatten struct")
val newfieldNames = fieldNames.filter(_!= fieldName) ++ structType.fieldNames.map(childname => fieldName.concat(".").concat(childname) .concat(" as ").concat(fieldName).concat("_").concat(childname))
val explodedf = df.selectExpr(newfieldNames:_*)
return flattenRecursive(explodedf)
case _ =>
println("other type")
}
}
df
}
Error I face is -
Ambiguous reference to fields StructField(landLine,LongType,true), StructField(landline,LongType,true);
Required output - if we can edit 1 landline column to landline_1 before explode
**Note - please provide the generic code because
- I don't know on which level I face this ambiguity and also
- I don't know the schema while running the code**
json
dataframe
scala
apache-spark
schema
0 Answers
Your Answer