package mobvista.dmp.datasource.rtdmp

import com.alibaba.fastjson.{JSONArray, JSONObject}
import mobvista.dmp.common.{CommonSparkJob, MobvistaConstant}
import mobvista.dmp.format.TextMultipleOutputFormat
import mobvista.dmp.util.DateUtil
import org.apache.commons.cli.{BasicParser, Options}
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.SequenceFile.CompressionType
import org.apache.hadoop.io.Text
import org.apache.hadoop.io.compress.{CompressionCodec, GzipCodec}
import org.apache.spark.sql.SparkSession
import org.apache.spark.storage.StorageLevel

import java.net.URI
import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

/**
 * @package: mobvista.dmp.datasource.rtdmp
 * @author: wangjf
 * @date: 2020/7/13
 * @time: 11:25 上午
 * @email: jinfeng.wang@mobvista.com
 * @phone: 152-1062-7698
 */
class RTDmpASV2 extends CommonSparkJob with Serializable {

  def commandOptions(): Options = {
    val options = new Options()
    options.addOption("input_data", true, "input_data")
    options.addOption("output", true, "output")
    options.addOption("coalesce", true, "coalesce")
    options.addOption("time", true, "time")
    options
  }

  override protected def run(args: Array[String]): Int = {

    val parser = new BasicParser()
    val options = commandOptions()
    val commandLine = parser.parse(options, args)
    val input_data = commandLine.getOptionValue("input_data")
    val output = commandLine.getOptionValue("output")
    val coalesce = commandLine.getOptionValue("coalesce")
    val time = commandLine.getOptionValue("time").replace(".", " ")

    val spark: SparkSession = MobvistaConstant.createSparkSession(s"RTDmpASV2.$time")

    val sc = spark.sparkContext
    try {

      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output), true)

      //  筛选所有的人群包，给一个最大值和一个最小值

      var update_time_start = DateUtil.format(DateUtil.getDay(time + ":00:00", "yyyy-MM-dd HH:mm:ss", -30), "yyyy-MM-dd HH:mm:ss")
      var update_time_end = DateUtil.format(time + ":59:59", "yyyy-MM-dd HH:mm:ss")
      var audience_date_utime_start = DateUtil.parse(update_time_start, "yyyy-MM-dd HH:mm:ss").getTime / 1000 - 28800
      var audience_date_utime_end = DateUtil.parse(update_time_end, "yyyy-MM-dd HH:mm:ss").getTime / 1000 - 28800
      //  var audience_date_utime_start = 1577811600L
      //  var audience_date_utime_end = 4100731200L
      //  var update_time_start = "2000-01-01 00:00:00"
      //  var update_time_end = "2099-12-31 23:59:59"

      import scala.collection.JavaConverters._
      val ids = ServerUtil.request(update_time_start, update_time_end, audience_date_utime_start, audience_date_utime_end, 0, 0, 4).asScala

      val trueId = ids.filter(kv => {
        kv._2._3 != 3
      }).keys.toSet

      val falseId = ids.filter(kv => {
        kv._2._3 == 3
      }).keys.toSet

      //  仅更新上个小时的数据
      update_time_start = DateUtil.format(time + ":00:00", "yyyy-MM-dd HH:mm:ss")
      update_time_end = DateUtil.format(time + ":59:59", "yyyy-MM-dd HH:mm:ss")
      audience_date_utime_start = DateUtil.parse(time + ":00:00", "yyyy-MM-dd HH:mm:ss").getTime / 1000 - 28800
      audience_date_utime_end = DateUtil.parse(time + ":59:59", "yyyy-MM-dd HH:mm:ss").getTime / 1000 - 28800

      val update_ids = ServerUtil.request(update_time_start, update_time_end, audience_date_utime_start, audience_date_utime_end, 0, 0, 2).asScala.keys.toSet

      println("trueId -->> " + trueId + ", falseId -->> " + falseId + ", update_ids -->> " + update_ids)

      val rdd = spark.read.orc(input_data).rdd.persist(StorageLevel.MEMORY_AND_DISK_SER)

      /*
      val data_v2_output = output + "/data_v2"
      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(data_v2_output), true)

      //  val foractivationIdSet = Logic.getForactivationIdSet(1)
      //  2020-12-30 14:28:58,RTDmp 数据新增人群包更新时间
      //  运算人群包不进行产出。
      rdd.map(row => {
        val array = new ArrayBuffer[(Text, Text)]()
        val jsonObject = new JSONObject()
        //  case class NewAudienceInfo(devid: String, update_time: String, audience_data: String)
        val deviceId = row.getAs[String]("devid")
        val audienceData = MobvistaConstant.String2JSONObject(row.getAs[String]("audience_data"))
        val audienceMap = new mutable.HashMap[String, Long]()
        val audience_id = new mutable.HashSet[Integer]()
        for (entry: java.util.Map.Entry[String, Object] <- audienceData.entrySet) {
          audience_id.add(entry.getKey.toInt)
          audienceMap.put(entry.getKey, DateUtil.parse(entry.getValue.asInstanceOf[String] + ":59:59", "yyyy-MM-dd HH:mm:ss").getTime / 1000 - 28800)
        }

        /*
        val audience_info = row.getAs[String]("audience_info")
        val jsonObject = MobvistaConstant.String2JSONObject(audience_info)
        import scala.collection.JavaConversions._

        val audience_id = JSON.parseArray(jsonObject.getJSONArray("audience_id").toJSONString, classOf[Integer]).toSet
        val audienceIds = (audience_id.asJava -- falseId) & trueId
        */
        val audienceIds = (audience_id.asJava -- falseId) & trueId
        val audienceMaps = audienceMap.retain((k, _) => !falseId.contains(Integer.parseInt(k)) && trueId.contains(Integer.parseInt(k)))
        if (audienceIds.nonEmpty) {
          jsonObject.put("devid", deviceId)
          //  jsonObject.put("audience_id", audienceIds.asJava)
          jsonObject.put("audience_id_v2", audienceMaps.asJava)

          //  2020-11-20 14:43:31 移除按 region 输出逻辑
          /*
          val regionSet = row.getAs("region").asInstanceOf[mutable.WrappedArray[String]]
          for (region <- regionSet) {
            if (StringUtils.isNotBlank(region) && !region.equalsIgnoreCase("null")) {
              //  淘宝拉活输出路径
              if ((Constant.foractivationIdSet & audienceIds & update_ids).nonEmpty) {
                array.add((new Text(s"$data_output/foractivation/$region, "), new Text(jsonObject.toJSONString)))
              }
              //  adx 输出路径
              if ((Constant.adxIdSet & audienceIds & update_ids).nonEmpty) {
                array.add((new Text(s"$data_output/adx/$region, "), new Text(jsonObject.toJSONString)))
              }
              //  其他人群包默认输出路径
              if ((Constant.foractivationIdSet & audienceIds & update_ids).isEmpty && (Constant.adxIdSet & audienceIds & update_ids).isEmpty) {
                array.add((new Text(s"$data_output/normal/$region, "), new Text(jsonObject.toJSONString)))
              }
            }
          }
          */

          //  淘宝拉活输出路径
          if ((audienceIds & update_ids).nonEmpty)
            array.add((new Text(s"$data_v2_output/foractivation, "), new Text(jsonObject.toJSONString)))
          //  adx 输出路径
          if ((Constant.adxIdSet & audienceIds & update_ids).nonEmpty)
            array.add((new Text(s"$data_v2_output/adx, "), new Text(jsonObject.toJSONString)))
          //  其他人群包默认输出路径
          if ((audienceIds & update_ids).isEmpty)
            array.add((new Text(s"$data_v2_output/normal, "), new Text(jsonObject.toJSONString)))

          /**
           * //  淘宝拉活输出路径
           * if ((foractivationIdSet & audienceIds & update_ids).nonEmpty)
           * array.add((new Text(s"${data_output}/foractivation, "), new Text(jsonObject.toJSONString)))
           * //  adx 输出路径
           * if ((Constant.adxIdSet & audienceIds & update_ids).nonEmpty)
           * array.add((new Text(s"${data_output}/adx, "), new Text(jsonObject.toJSONString)))
           * //  其他人群包默认输出路径
           * if ((foractivationIdSet & audienceIds & update_ids).isEmpty && (Constant.adxIdSet & audienceIds & update_ids).isEmpty)
           * array.add((new Text(s"${data_output}/normal, "), new Text(jsonObject.toJSONString)))
           */
        }
        array.iterator
      }).flatMap(l => l)
        .repartition(coalesce.toInt)
        .saveAsNewAPIHadoopFile(data_v2_output, classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat])
      */

      val data_output = output + "/data"
      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(data_output), true)

      val conf = spark.sparkContext.hadoopConfiguration
      conf.set("mapreduce.output.compress", "true")
      conf.set("mapreduce.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec")
      conf.setBoolean("mapreduce.output.fileoutputformat.compress", true)
      conf.set("mapreduce.output.fileoutputformat.compress.type", CompressionType.BLOCK.toString)
      conf.setClass("mapreduce.output.fileoutputformat.compress.codec", classOf[GzipCodec], classOf[CompressionCodec])

      //  GzipCodec
      rdd.map(row => {
        val array = new ArrayBuffer[(Text, Text)]()
        val jsonObject = new JSONObject()
        val deviceId = row.getAs[String]("devid")
        val audienceData = MobvistaConstant.String2JSONObject(row.getAs[String]("audience_data"))
        val audienceMap = new mutable.HashMap[String, Long]()
        val audience_id = new mutable.HashSet[Integer]()
        for (entry: java.util.Map.Entry[String, Object] <- audienceData.entrySet) {
          audience_id.add(entry.getKey.toInt)
          audienceMap.put(entry.getKey, DateUtil.parse(entry.getValue.asInstanceOf[String] + ":59:59", "yyyy-MM-dd HH:mm:ss").getTime / 1000 - 28800)
        }
        val audienceIds = (audience_id.asJava -- falseId) & trueId
        val audienceMaps = audienceMap.retain((k, _) => !falseId.contains(Integer.parseInt(k)) && trueId.contains(Integer.parseInt(k)))
        if (audienceIds.nonEmpty) {
          jsonObject.put("devid", deviceId)
          jsonObject.put("audience_id_v2", audienceMaps.asJava)
          if ((audienceIds & update_ids).nonEmpty)
            array.add((new Text(s"$data_output/foractivation, "), new Text(jsonObject.toJSONString)))
          //  adx 输出路径
          if ((Constant.adxIdSet & audienceIds & update_ids).nonEmpty)
            array.add((new Text(s"$data_output/adx, "), new Text(jsonObject.toJSONString)))
          //  其他人群包默认输出路径
          if ((audienceIds & update_ids).isEmpty)
            array.add((new Text(s"$data_output/normal, "), new Text(jsonObject.toJSONString)))
        }
        array.iterator
      }).flatMap(l => l)
        .repartition(coalesce.toInt)
        .saveAsNewAPIHadoopFile(data_output, classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], conf)

      val audienceSum = rdd.map(row => {
        val array = new ArrayBuffer[(Int, Int)]()

        val audienceData = MobvistaConstant.String2JSONObject(row.getAs[String]("audience_data"))
        audienceData.keySet().foreach(k => {
          val audienceId = Integer.parseInt(k)
          if (update_ids.contains(audienceId)) {
            array.add((audienceId, 1))
          }
        })

        /*
        val audienceInfo = MobvistaConstant.String2JSONObject(row.getAs[String]("audience_info"))
        audienceInfo.getJSONArray("audience_id").iterator().foreach(k => {
          val audienceId = Integer.parseInt(k.toString)
          if (update_ids.contains(audienceId)) {
            array.add((audienceId, 1))
          }
        })
        */
        array.iterator
      }).flatMap(l => l)
        .combineByKey(
          (v: Int) => v,
          (c: Int, v: Int) => c + v,
          (c1: Int, c2: Int) => c1 + c2
        )

      val audience_output = output + "/audience"
      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(audience_output), true)

      audienceSum.coalesce(1).saveAsTextFile(audience_output)

      val jsonArray = new JSONArray()
      audienceSum.collect().foreach(m => {
        val jsonObject = new JSONObject()
        jsonObject.put("id", m._1)
        jsonObject.put("audience_data_status", 2)
        jsonObject.put("audience_count", m._2)
        jsonArray.add(jsonObject)
      })

      val jsonObject = ServerUtil.update(jsonArray)
      if (jsonObject.getInteger("code") == 200) {
        println("Audience Update OK!")
      }
    } finally {
      if (sc != null) {
        sc.stop()
      }
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }
}

object RTDmpASV2 {
  def main(args: Array[String]): Unit = {
    new RTDmpASV2().run(args)
  }
}