package mobvista.dmp.datasource.retargeting

import java.net.URI
import java.text.SimpleDateFormat

import mobvista.dmp.util.DateUtil
import mobvista.prd.datasource.util.GsonUtil
import org.apache.commons.cli.{BasicParser, Options}
import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
import org.apache.spark.storage.StorageLevel

import scala.collection.mutable

class UserFeatureFilterJob extends Serializable {

  def commandOptions(): Options = {
    val options = new Options()
    options.addOption("date", true, "date")
    options.addOption("days", true, "days")
    options.addOption("output", true, "output")
    options.addOption("coalesce", true, "coalesce")
    options
  }

  var bMap: Broadcast[scala.collection.Map[String, String]] = null
  var packageMap: Broadcast[scala.collection.Map[String, Int]] = null

  private def run(args: Array[String]) {
    val parser = new BasicParser()
    val options = commandOptions()
    val commandLine = parser.parse(options, args)
    val date = commandLine.getOptionValue("date")
    val days = commandLine.getOptionValue("days", "0").toInt
    val output = commandLine.getOptionValue("output")
    val coalesce = commandLine.getOptionValue("coalesce")

    val spark = SparkSession
      .builder()
      .appName(s"UserFeatureFilterJob.${date}")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "lz4")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .enableHiveSupport()
      .getOrCreate()

    try {

      val sdf1 = new SimpleDateFormat("yyyyMMdd")
      val sdf2 = new SimpleDateFormat("yyyy-MM-dd")

      bMap = spark.sparkContext.broadcast(spark.sql(Constant.id_old2new_sql).rdd.map(r => {
        (r.getAs("tag_code").toString, r.getAs("tag_id").toString)
      }).collectAsMap())

      val last_req_day = DateUtil.getDayByString(date, "yyyyMMdd", -days)
      val region_sql = Constant.region_sql.replace("@date", date).replace("@last_req_day", last_req_day)
      val region_df = spark.sql(region_sql).persist(StorageLevel.MEMORY_AND_DISK_SER)
      region_df.createOrReplaceTempView("dm_region")

      val update_date = DateUtil.getDayByString(sdf2.format(sdf1.parse(date)), "yyyy-MM-dd", -days)
      val sql = Constant.user_feature_sql.replace("@date", date).replace("@update_date", update_date)

      import spark.implicits._

      val cn_rdd = spark.sql(sql.replace("@region", "cn")).rdd
        .map(mapFun).toDF
      //  .mapPartitions(mapPartition)
      //  .toDF

      FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output + "/cn"), true)
      cn_rdd.repartition(coalesce.toInt / 10).write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "snappy")
        .orc(output + "/cn")

      val vg_rdd = spark.sql(sql.replace("@region", "virginia")).rdd.map(mapFun).toDF
      FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output + "/vg"), true)
      vg_rdd.repartition(coalesce.toInt)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "snappy")
        .orc(output + "/vg")

      val hk_rdd = spark.sql(sql.replace("@region", "tokyo")).rdd.map(mapFun).toDF
      FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output + "/hk"), true)
      hk_rdd.repartition(coalesce.toInt)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "snappy")
        .orc(output + "/hk")

      /*
      val cn_sql = sql + s" AND region = 'cn'"
      val vg_sql = sql + s" AND region = 'virginia'"
      val hk_sql = sql + s" AND region = 'tokyo'"

      val cn_rdd = spark.sql(cn_sql)

      FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output + "/cn"), true)
      cn_rdd.coalesce(coalesce.toInt).write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "snappy")
        .orc(output + "/cn")

      val vg_rdd = spark.sql(vg_sql)
      FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output + "/vg"), true)
      vg_rdd.coalesce(coalesce.toInt)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "snappy")
        .orc(output + "/vg")

      val hk_rdd = spark.sql(hk_sql)
      FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output + "/hk"), true)
      hk_rdd.coalesce(coalesce.toInt)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "snappy")
        .orc(output + "/hk")
      */

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }

    def mapFun(row: Row): DeviceTag = {
      /*
      val frequencys = new mutable.HashSet[struct]()
      val interestIdSet = new mutable.HashSet[Int]()
      if (row.getAs("frequency") != null) {
        val deviceFrequency = DeviceFrequency(row.getAs("frequency"))
        val frequency = deviceFrequency.frequency
        for (i <- frequency.indices) {
          val tag = frequency(i).asInstanceOf[GenericRowWithSchema].getAs("tag").toString
          val cnt = Integer.parseInt(frequency(i).asInstanceOf[GenericRowWithSchema].getAs("cnt").toString)
          if (bMap.value.contains(tag)) {
            frequencys.add(struct(bMap.value(tag), cnt))
            interestIdSet.add(bMap.value(tag).toInt)
          }
        }
      }
      DeviceTag(row.getAs("device_id"), row.getAs("age"), row.getAs("gender"), row.getAs("install_apps"),
        interestIdSet.toArray.mkString(","), frequencys.toArray[struct])
      */

      /*
      val device_id = row.getAs("device_id")
      val age = row.getAs("age")
      val gender = row.getAs("gender")
      val install_apps = row.getAs("install_apps")
      */
      val interest = row.getAs("interest").asInstanceOf[mutable.WrappedArray[String]]
      val interest_set = new mutable.HashSet[Int]()
      interest.foreach(r => {
        if (bMap.value.keySet.contains(r) && StringUtils.isNotBlank(bMap.value(r))) {
          interest_set.add(bMap.value(r).toInt)
        }
      })

      val frequencySet = new mutable.HashSet[struct]()
      val frequency = row.getAs("frequency").toString
      import scala.collection.JavaConversions._
      val json = GsonUtil.String2JsonObject(frequency)
      json.entrySet().foreach(j => {
        if (StringUtils.isNotBlank(j.getKey) && bMap.value.keySet.contains(j.getKey)) {
          frequencySet.add(struct(bMap.value(j.getKey), j.getValue.getAsInt))

          interest_set.add(bMap.value(j.getKey).toInt)
        }
      })

      DeviceTag(row.getAs("device_id"), row.getAs("age"), row.getAs("gender"), row.getAs("install_apps"),
        interest_set.mkString(","), mutable.WrappedArray.make(frequencySet.toArray))
    }
  }
}

object UserFeatureFilterJob {
  def main(args: Array[String]): Unit = {
    new UserFeatureFilterJob().run(args)
  }
}