package mobvista.dmp.datasource.dsp.dc.interest

import java.net.URI
import java.util

import mobvista.dmp.common.CommonSparkJob
import mobvista.prd.datasource.util.GsonUtil
import org.apache.commons.cli.Options
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.SparkSession

import scala.collection.mutable.ArrayBuffer
import scala.collection.JavaConversions._

/**
  *
  */
class DmDCInterestTagV2 extends CommonSparkJob with Serializable {

  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return -1
    } else {
      printOptions(commandLine)
    }

    val date = commandLine.getOptionValue("date")
    val yestoday = commandLine.getOptionValue("yestoday")
    val input = commandLine.getOptionValue("input")
    val output = commandLine.getOptionValue("output")
    val dictPath = commandLine.getOptionValue("dictPath")

    val tagType = "category"
    val packageName = ""

    val spark = SparkSession.builder()
      .appName("DmDCInterestTag")
      .config("spark.rdd.compress", "true")
      .config("spark.speculation", "true")
      .config("spark.speculation.quantile", "0.8")
      .config("spark.speculation.multiplier", "1.5")
      .config("spark.io.compression.codec", "snappy")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()
    import spark.implicits._
    val sc = spark.sparkContext

    FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output), true)
    try {
      // 广播segmentId和标签对应关系
      val dictData = sc.textFile(dictPath)
        .map(splitFun(_))
        .map(array => {
          val id = array(0)
          val firstTag = array(9)
          val secondTag = array(10)
          (id, s"$firstTag$DATA_SPLIT$secondTag")
        })
        .collectAsMap()

      val dictBC = sc.broadcast(dictData)

      // dsp request 天数据
      val dailyTuple = sc.textFile(input)
        .map(splitFun(_))
        .filter(array => array.length >= 14 && StringUtils.isNotEmpty(array(13)) && array(13).contains("{"))
        .flatMap(x => matchBySegment(x, dictBC, packageName, tagType, date))
        .toDF()
        .createOrReplaceTempView("t_dc_daily")

     /* val sql =
        s"""
           |select t.device_id, t.device_type, t.platform, t.package_name, t.tag_type,
           | t.first_tag, t.second_tag, t.update_date
           |from (
           |  select t.device_id, t.device_type, t.platform, t.package_name, t.tag_type,
           |   t.first_tag, t.second_tag, t.update_date,
           |   row_number() over(partition by t.device_id, t.device_type, t.platform, t.first_tag, t.second_tag order by t.update_date desc) as rk
           |  from (
           |    select t.device_id, t.device_type, t.platform, t.package_name, t.tag_type,
           |     t.first_tag, t.second_tag, t.update_date
           |    from t_dc_daily t
           |    union all
           |    select t.device_id, t.device_type, t.platform, t.package_name, t.tag_type,
           |     t.first_tag, t.second_tag, t.update_date
           |    from dwh.dm_device_tag t
           |    where t.dt='${yestoday}' and t.source='dc' and t.business='dsp_req'
           |  ) t
           |) t
           |where rk='1'
         """.stripMargin */

      val sql =
        s"""
           |select t.device_id, t.device_type, t.platform, t.package_name, t.tags, t.update_date
           |from (
           |  select t.device_id, t.device_type, t.platform, t.package_name, t.tags, t.update_date,
           |   row_number() over(partition by t.device_id, t.device_type, t.platform, t.tags order by t.update_date desc) as rk
           |  from (
           |    select t.device_id, t.device_type, t.platform, t.package_name, t.tags,t.update_date
           |    from t_dc_daily t
           |    union all
           |    select t.device_id, t.device_type, t.platform, t.package_name, t.tags , t.update_date
           |    from dwh.dmp_device_tag t
           |    where t.dt='${yestoday}' and t.source='dc' and t.business='dsp_req'
           |  ) t
           |) t
           |where rk='1'
         """.stripMargin

      spark.sql(sql)
        .write
        .option("orc.compress", "zlib")
        .orc(output)
    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }

  def matchBySegment(array: Array[String], dictBC: Broadcast[scala.collection.Map[String, String]],
                     packageName: String, tagType: String, date: String): Array[DmDeviceTagNew] = {
    val buffer = new ArrayBuffer[DmDeviceTagNew]()
    val deviceId = array(0)
    val deviceType = array(1)
    val platform = array(2)
    val segments = array(13)

    var tagsCol =""
    val tagset = new util.HashSet[String]()
    GsonUtil.String2JsonArray(segments)
      .foreach(element => {
        val id =
          if (element.getAsJsonObject.get("id").isJsonNull) {
            "-1"
          } else {
            element.getAsJsonObject.get("id").getAsString
          }
        var firstTag = ""
        var secondTag = ""
        dictBC.value.get(id) match {
          case Some(tags) => {
            val splits = splitFun(tags)
            firstTag = splits(0)
            secondTag = splits(1)
          }
          case None => {
          }
        }
        tagset.add(s"${firstTag}#${secondTag}")  //同一个设备tag去处重复

        // buffer += DmDeviceTag(deviceId, deviceType, platform, packageName, tagType, firstTag, secondTag, date)
      })

    tagset.foreach(line =>{
      var firstTag = ""
      var secondTag = ""
     if(line.split("#").length == 1){
       firstTag = line.split("#")(0)
     } else if(line.split("#").length == 2){
       firstTag = line.split("#")(0)
       secondTag = line.split("#")(1)
      }
      val tag = s"${tagType}#${firstTag}#${secondTag}"
      tagsCol +=s"${tag},"

      })
    var tagRes = ""
    if(StringUtils.isNotBlank(tagsCol) && tagsCol.endsWith(",")){
      tagRes = tagsCol.substring(0,tagsCol.length -1 ) // del 最后一个,
    }
    buffer += DmDeviceTagNew(deviceId, deviceType, platform, packageName, tagRes, date)
    buffer.toArray
  }

  override protected def buildOptions(): Options = {
    val options = super.buildOptions()
    options.addOption("date", true, "[must] date")
    options.addOption("yestoday", true, "[must] yestoday")
    options.addOption("dictPath", true, "[must] interest dict")
    options
  }
}

object DmDCInterestTagV2 {
  def main(args: Array[String]): Unit = {
    new DmDCInterestTagV2().run(args)
  }
}

case class DmDeviceTag(device_id: String, device_type: String, platform: String
                       , package_name: String, tag_type: String, first_tag: String,
                       second_tag: String, update_date: String)

case class DmDeviceTagNew(device_id: String, device_type: String, platform: String
                       , package_name: String, tags: String, update_date: String)