package mobvista.dmp.datasource.dm

import java.net.URI
import mobvista.dmp.common.{CommonSparkJob, MobvistaConstant}
import org.apache.commons.cli.Options
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.{SaveMode, SparkSession}

/**
  * @author wangjf
  */
class DmInterestTagAllV2 extends CommonSparkJob with Serializable {
  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      printOptions(commandLine)
      return 1
    } else {
      printOptions(commandLine)
    }

    val date = commandLine.getOptionValue("date")
    val ga_date = commandLine.getOptionValue("ga_date")
    val output = commandLine.getOptionValue("output")
    val coalesce = commandLine.getOptionValue("coalesce")

    val spark = SparkSession.builder()
      .appName(s"DmInterestTagAllV2.${date}")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "lz4")
      .config("spark.io.compression.lz4.blockSize", "64k")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.autoBroadcastJoinThreshold", "209715200")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .config("spark.kryo.registrator", "mobvista.dmp.datasource.dm.MyRegisterKryo")
      .enableHiveSupport()
      .getOrCreate()
    val sc = spark.sparkContext

    FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output), true)

    try {
      import spark.implicits._

      spark.udf.register("str2Json", InterestTagConstant.str2Json _)
      spark.udf.register("check_deviceId", MobvistaConstant.checkDeviceId _)
      spark.udf.register("combineJson", InterestTagConstant.combineJsonArray _)

      var sql = InterestTagConstant.app_tag_except_sql.replace("@date", date)
      spark.sql(sql)
        .flatMap(InterestTagConstant.flatInterest)
        .toDF()
        .createOrReplaceTempView("t_app_two_tags")

      sql = InterestTagConstant.tag_sql.replace("@str2Json", "str2Json")

      val bMap = sc.broadcast(spark.sql(sql).rdd.map(r => {
        (r.getAs("package_name").toString + "-" + r.getAs("platform").toString, r.getAs("tags").toString)
      }).collectAsMap())

      val interest_tag_sql = InterestTagConstant.interest_sql.replace("@date", date)
        .replace("@ga_date", ga_date)
        .replace("@check_deviceId", "check_deviceId(device_id)")
        .replace("@combineJson", "combineJson")

      val df = spark.sql(interest_tag_sql)
        .map(r => {
          DmInterestTag(r.getAs("device_id"), r.getAs("device_type"), r.getAs("platform"), r.getAs("tags"))
        })
        .mapPartitions(v => new CustomerIteratorInterest(v, bMap))
        .toDF

      df.repartition(coalesce.toInt).write.mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .orc(output)

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("date", true, "[must] date")
    options.addOption("ga_date", true, "[must] ga_date")
    options.addOption("output", true, "[must] output")
    options.addOption("coalesce", true, "[must] coalesce")
    options
  }
}

object DmInterestTagAllV2 {
  def main(args: Array[String]): Unit = {
    new DmInterestTagAllV2().run(args)
  }
}