package mobvista.dmp.datasource.joypac

import java.net.URI

import com.alibaba.fastjson.{JSON, JSONArray, JSONObject}
import mobvista.dmp.clickhouse.realtime.Constant
import mobvista.dmp.common.CommonSparkJob
import mobvista.dmp.util.{DateUtil, MRUtils, PropertyUtil}
import org.apache.commons.cli.Options
import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SparkSession}

/**
  * @package: mobvista.dmp.datasource.joypac
  * @author: wangjf
  * @date: 2019-12-18
  * @time: 14:10:50
  * @email: jinfeng.wang@mobvista.com
  * @phone: 152-1062-7698
  */

class JoypacResultDaily extends CommonSparkJob {

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("date", true, "[must] date")
    options.addOption("output", true, "[must] output")
    options.addOption("coalesce", true, "[must] coalesce")
    options.addOption("cluster", true, "[must] cluster")
    options.addOption("dict", true, "[must] dict")

    options
  }

  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return -1
    } else {
      printOptions(commandLine)
    }

    val date = commandLine.getOptionValue("date")
    val output = commandLine.getOptionValue("output")
    val coalesce = commandLine.getOptionValue("coalesce")
    val cluster = commandLine.getOptionValue("cluster")
    val dict = commandLine.getOptionValue("dict")

    val spark = SparkSession.builder()
      .appName(s"JoypacResultDaily.${date}")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "snappy")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .config("spark.clickhouse.driver", "ru.yandex.clickhouse.ClickHouseDriver")
      .config("spark.clickhouse.url", PropertyUtil.getProperty("config.properties", "spark.clickhouse.url"))
      .config("spark.clickhouse.connection.per.executor.max", "5")
      .config("spark.clickhouse.metrics.enable", "true")
      .config("spark.clickhouse.socket.timeout.ms", "300000")
      .config("spark.clickhouse.cluster.auto-discovery", "true")
      .enableHiveSupport()
      .getOrCreate()
    val sc = spark.sparkContext

    FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output), true)
    try {

      val packageMap = spark.sparkContext.broadcast(sc.textFile(dict).map(r => {
        r.split(";")
      }).filter(_.length == 2).map(r => {
        (r(0).toUpperCase(), r(1))
      }).collectAsMap())

      val update_date = DateUtil.format(DateUtil.parse(date, "yyyyMMdd"), "yyyy-MM-dd")

      val dailyDF = spark.sql(JoypacResultDaily.sql.replace("@date", date).replace("@update_date", update_date).replace("@part", "all"))
        .rdd.map(r => {
        val allApps = new JSONObject()
        allApps.put("reporter_package_name", packageMap.value.getOrElse(r.getAs("package_name").toString.toUpperCase,
          r.getAs("package_name").toString))
        allApps.put("reporter_app_version", r.getAs("app_version"))
        val install_apps = new JSONArray()
        import scala.collection.JavaConverters._
        val appsInfoMap = JSON.parse(r.getAs("apps_info").toString).asInstanceOf[java.util.Map[String, Int]].asScala

        for (elem <- appsInfoMap) {
          val json = new JSONObject()
          if (packageMap.value.contains(elem._1.toUpperCase)) {
            json.put("package_name", packageMap.value(elem._1.toUpperCase()))
          } else {
            json.put("package_name", packageMap.value.getOrElse(elem._1.toUpperCase() + "://", ""))
          }
          json.put("status", elem._2)
          json.put("schema_url", elem._1)
          install_apps.add(json)
        }
        allApps.put("install_apps", install_apps)

        (MRUtils.JOINER.join(r.getAs("device_id"), r.getAs("platform")), allApps)
      }).combineByKey(
        (v: JSONObject) => Iterable(v),
        (c: Iterable[JSONObject], v: JSONObject) => c ++ Seq(v),
        (c1: Iterable[JSONObject], c2: Iterable[JSONObject]) => c1 ++ c2
      ).map(r => {
        val jsonArray = new JSONArray()
        for (elem <- r._2) {
          jsonArray.add(elem)
        }
        val ks = MRUtils.SPLITTER.split(r._1)
        Row(ks(0), ks(1), jsonArray.toJSONString)
      })
      spark.createDataFrame(dailyDF, JoypacResultDaily.daily_schema).dropDuplicates("device_id").createOrReplaceTempView("daily_joypac")

      import io.clickhouse.spark.connector._

      val query = JoypacResultDaily.ck_sql
        .replace("@date", update_date)

      val ckDF = sc.clickhouseTable(query, cluster)
        .withCustomPartitioning(Constant.buildPart(coalesce.toInt))

      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output), true)

      val ckDFF = ckDF.map(r => {
        Row(r.getAs("device_id"), r.getAs("country"), r.getAs("age"), r.getAs("gender"),
          r.getAs("install_apps"), r.getAs("interest"))
      })
      spark.createDataFrame(ckDFF, JoypacResultDaily.ck_schema).dropDuplicates("device_id").createOrReplaceTempView("daily_joypac_insight")

      spark.sql(JoypacResultDaily.join_sql).rdd.map(r => {
        val json = new JSONObject()
        val device_id = r.getAs("device_id").toString
        val keyJson = new JSONObject()
        keyJson.put("idfaGaid", device_id)
        val valJson = new JSONObject()
        val os = if (r.getAs("platform") != null && StringUtils.isNotBlank(r.getAs("platform").toString)) {
          Integer.parseInt(r.getAs("platform").toString)
        } else {
          0
        }
        valJson.put("os", os)
        val age = Integer.parseInt(r.getAs("age").toString)
        valJson.put("age", age)
        val gender = Integer.parseInt(r.getAs("gender").toString)
        valJson.put("gender", gender)
        val interest = r.getAs("interest").toString.replace("'", "\"")
        valJson.put("interest", JSON.parseArray(interest))
        val install_apps = r.getAs("install_apps").toString
        valJson.put("install_apps", JSON.parseArray(install_apps))
        val all_apps = r.getAs("all_apps").toString
        valJson.put("allApps", JSON.parseArray(all_apps))
        json.put("key", keyJson)
        json.put("val", valJson)
        json.toJSONString
      }).coalesce(coalesce.toInt).saveAsTextFile(output, classOf[GzipCodec])

    } finally {
      sc.stop()
      spark.stop()
    }
    0
  }

}

object JoypacResultDaily {

  val ck_schema = StructType(Array(
    StructField("device_id", StringType),
    StructField("country", StringType),
    StructField("age", IntegerType),
    StructField("gender", IntegerType),
    StructField("install_apps", StringType),
    StructField("interest", StringType)))

  val daily_schema = StructType(Array(
    StructField("device_id", StringType),
    StructField("platform", StringType),
    StructField("all_apps", StringType)))

  val sql: String =
    """
      |SELECT * FROM dwh.joypac_result WHERE dt = '@date' AND update_date = '@update_date' AND part = '@part'
    """.stripMargin

  val ck_sql: String =
    """
      |SELECT device_id, country, age, gender, toString(install_apps) install_apps, toString(interest) interest FROM dwh.joypac_insight_daily_all WHERE dt = '@date'
    """.stripMargin

  val join_sql: String =
    """
      |SELECT a.device_id device_id, a.platform platform, a.all_apps all_apps, COALESCE(b.age,0) age, COALESCE(b.gender,0) gender, COALESCE(b.interest,'[]') interest, COALESCE(b.install_apps,'[]') install_apps
      | FROM daily_joypac a LEFT JOIN daily_joypac_insight b ON UPPER(a.device_id) = UPPER(b.device_id)
    """.stripMargin

  def main(args: Array[String]): Unit = {
    new JoypacResultDaily().run(args)
  }
}