Untitled

FROM pyspark.sql.types import *
import pyspark.sql.functions AS F

schema = StructType([
    StructField('name', StringType(), nullable=FALSE),
    StructField("business", ArrayType(StringType())),
    StructField('total', IntegerType(), nullable=FALSE)
])

test_list = ['a',['Hello', 'world'],200], ['a',['I', 'am', 'fine'],300],['b',[],500]

df = spark.createDataFrame(test_list,schema=schema)


df1=df.groupBy('name','business').agg(F.when(F.size('business')>1,F.SUM('total')/F.size('business')).otherwise(F.SUM('total')).alias('total'))
df1 = df1.withColumn('business',F.explode_outer('business'))