package mobvista.dmp.datasource.mpsdk

import mobvista.dmp.datasource.newtag.{Constant, MatchInterestTagDaily, TagDaily}
import org.apache.commons.lang.StringUtils
import org.apache.spark.sql.{DataFrame, SparkSession}

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

/**
  * @package: mobvista.dmp.datasource.dsp
  * @author: wangjf
  * @date: 2019/3/18
  * @time: 下午6:23
  * @email: jinfeng.wang@mobvista.com
  * @phone: 152-1062-7698
  */
class MpSdkTagDaily extends MatchInterestTagDaily with Serializable {

  override def processDailyData(date: String, spark: SparkSession): DataFrame = {

    val sql = Constant.mp_sql.replace("@dt", date)

    val rdd = spark.sql(sql).rdd.mapPartitions(rs => {
      rs.map(r => {
        TagDaily(r.getAs("device_id"), r.getAs("device_type"), r.getAs("platform"), r.getAs("package_list"))
      })
    }).flatMap(r => {
      val buffer = new ArrayBuffer[TagDaily]()
      val device_id = r.device_id
      val device_type = r.device_type
      val platform = r.platform
      val package_list = r.package_name
      val set = new mutable.HashSet[String]()
      if (StringUtils.isNotBlank(package_list) && package_list.startsWith("[") && package_list.endsWith("]")) {
        package_list.substring(1, package_list.length - 1).split(",").foreach(r => {
          set.add(r)
        })
      }
      set.foreach(package_name => {
        buffer += TagDaily(device_id, device_type, platform, package_name)
      })
      buffer
    })
    import spark.implicits._
    rdd.toDF
  }
}

object MpSdkTagDaily {
  def main(args: Array[String]): Unit = {
    new MpSdkTagDaily().run(args)
  }
}