|select idfa,gaid,platform,country,ip,gender,birthday,maker,model,osversion,concat_ws('#',collect_set(packagename)) packagename,exitid,max(`time`) datetime,segment,dealerid,exchanges,concat_ws('#',collect_set(region)) region
|select idfa,gaid,platform,country,ip,gender,birthday,maker,model,osversion,ua,concat_ws('#',collect_set(packagename)) packagename,exitid,max(`time`) datetime,segment,dealerid,exchanges,concat_ws('#',collect_set(region)) region
|from dsp_org_etl_hours
|group by idfa,gaid,platform,country,ip,gender,birthday,maker,model,osversion,exitid,segment,dealerid,exchanges
|group by idfa,gaid,platform,country,ip,gender,birthday,maker,model,osversion,ua,exitid,segment,dealerid,exchanges
""".stripMargin
spark.sql(etl_sql).repartition(coalesce.toInt)
.write
...
...
@@ -135,6 +134,7 @@ class DspOrgLogEtlHoursDemo extends CommonSparkJob with Serializable {
valmaker=row.getAs[String]("make")
valmodel=row.getAs[String]("model")
valosVersion=row.getAs[String]("osversion")
valua=row.getAs[String]("ua")
valcountry=row.getAs[String]("country")
valbirthday=row.getAs[String]("birthday")
valgender=row.getAs[String]("gender")
...
...
@@ -203,9 +203,8 @@ class DspOrgLogEtlHoursDemo extends CommonSparkJob with Serializable {