package mobvista.dmp.datasource.dsp.dc.interest

import java.net.URI
import java.util
import java.util.regex.Pattern

import com.google.gson.JsonElement
import mobvista.dmp.common.CommonSparkJob
import mobvista.prd.datasource.util.GsonUtil
import org.apache.commons.cli.Options
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SparkSession

import scala.collection.JavaConversions._

/**
  *
  */
class DmDCInterestTag extends CommonSparkJob with Serializable {

  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return -1
    } else {
      printOptions(commandLine)
    }

    val date = commandLine.getOptionValue("date")
    val input = commandLine.getOptionValue("input")
    val output = commandLine.getOptionValue("output")
    val dictPath = commandLine.getOptionValue("dictPath")
    val oldOutput = commandLine.getOptionValue("oldInterestPath")
    val parallelism = commandLine.getOptionValue("parallelism", "200").toInt
    val coalesce = commandLine.getOptionValue("coalesce", "20").toInt
    val pattern = Pattern.compile(DATA_SPLIT)


    val spark = SparkSession.builder()
      .appName("DmDCInterestTag")
      .config("spark.rdd.compress", "true")
      .config("spark.default.parallelism", s"$parallelism")
      //      .config("spark.speculation", "true")
      //      .config("spark.speculation.quantile", "0.8")
      //      .config("spark.speculation.multiplier", "1")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .getOrCreate()
    import spark.implicits._
    val sc = spark.sparkContext

    FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output), true)

    try {
      // 广播segmentId和标签对应关系
      val dictData = sc.textFile(dictPath)
        .map(splitFun(_))
        .map(array => {
          val id = array(0)
          val firstTag = array(9)
          val secondTag = array(10)
          (id, s"$firstTag$DATA_SPLIT$secondTag")
        })
        .collectAsMap()

      val dictBC = sc.broadcast(dictData)

      // dsp request 天数据
      val dailyTuple = sc.textFile(input)
        .map(splitFun(_))
        .filter(array => array.length >= 14 && StringUtils.isNotEmpty(array(13)) && array(13).contains("{"))
        .map(array => {
          val deviceId = array(0)
          val deviceType = array(1)
          val platform = array(2)
          val segments = array(13)
          (s"$deviceId$DATA_SPLIT$deviceType$DATA_SPLIT$platform", segments)
        })

      var totalTuple = spark.sparkContext.makeRDD(Seq(("\t\t\t", "")))
      if (StringUtils.isNotEmpty(oldOutput)) {
        // 兴趣总量数据
        totalTuple = spark.read.orc(oldOutput)
          .map(row => {
            val deviceId = String.valueOf(row.get(0))
            val deviceType = String.valueOf(row.get(1))
            val platform = String.valueOf(row.get(2))
            val segments = String.valueOf(row.get(3))
            (s"$deviceId$DATA_SPLIT$deviceType$DATA_SPLIT$platform", segments)
          })
          .rdd
      }


      dailyTuple.fullOuterJoin(totalTuple)
        .map(tuple => {
          var result = ""
          val dailySegments = tuple._2._1
          val totalSegments = tuple._2._2
          if (dailySegments != None && totalSegments == None) { //只有新数据
            val jsonArray = GsonUtil.String2JsonArray(dailySegments.get)
            jsonArray.foreach(element => {
              val obj = element.getAsJsonObject
              val id = obj.get("id").getAsString
              val tags = dictBC.value.get(id)
              if (tags != None) {
                val tagSplits = pattern.split(tags.get)
                for (i <- 0 until tagSplits.length) {
                  obj.addProperty(String.valueOf(i + 1), tagSplits(i))
                }
                obj.addProperty("date", date)
              }
            })
            result = jsonArray.toString
          } else if (dailySegments == None && totalSegments != None) { //只有全量数据
            result = totalSegments.get
          } else if (dailySegments != None && totalSegments != None) { //既有新数据又有全量数据
            val map = new util.HashMap[String, JsonElement]()
            GsonUtil.String2JsonArray(totalSegments.get)
              .foreach(element => {
                val id = element.getAsJsonObject.get("id").getAsString
                map.put(id, element)
              })
            GsonUtil.String2JsonArray(dailySegments.get)
              .foreach(element => {
                val obj = element.getAsJsonObject
                val id = obj.get("id").getAsString
                val tags = dictBC.value.get(id)
                if (tags != None) {
                  val tagSplits = pattern.split(tags.get)
                  for (i <- 0 until tagSplits.length) {
                    obj.addProperty(String.valueOf(i + 1), tagSplits(i))
                  }
                }
                obj.addProperty("date", date)
                map.put(id, obj)
              })
            result = GsonUtil.toJson(map.values())
          }

          val keySplits = splitFun(tuple._1)
          DmDCInterestTagVO(keySplits(0), keySplits(1), keySplits(2), result) // devie_id、device_type、platform、tags
        })
        .filter(x => !x.device_id.equals(""))
        //        .coalesce(coalesce, true)
        .toDF()
        .write
        .option("orc.compress", "zlib")
        .format("ORC")
        .save(output)

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }

  override protected def buildOptions(): Options = {
    val options = super.buildOptions()
    options.addOption("date", true, "[must] date")
    options.addOption("oldInterestPath", true, "old interest path")
    options.addOption("dictPath", true, "[must] interest dict")
    options
  }
}

object DmDCInterestTag {
  def main(args: Array[String]): Unit = {
    new DmDCInterestTag().run(args)
  }
}

case class DmDCInterestTagVO(device_id: String, device_type: String, platform: String, tags: String)
