package mobvista.dmp.datasource.age_gender

import java.net.URI

import mobvista.dmp.common.CommonSparkJob
import mobvista.dmp.util.DateUtil
import org.apache.commons.cli.Options
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.storage.StorageLevel

/**
  * @package: mobvista.dmp.datasource.age_gender
  * @author: wangjf
  * @create: 2018-09-10 16:46
  **/
class MergeInstallAgeGender extends CommonSparkJob with Serializable {

  override protected def run(args: Array[String]): Int = {

    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return -1
    } else {
      printOptions(commandLine)
    }

    val dsp_gender_path = commandLine.getOptionValue("dsp_gender_path")
    val ga_gender_path = commandLine.getOptionValue("ga_gender_path")
    val fb_gender_path = commandLine.getOptionValue("fb_gender_path")
    val tp_gender_path = commandLine.getOptionValue("tp_gender_path")
    val gender_output = commandLine.getOptionValue("gender_output")

    val dsp_age_path = commandLine.getOptionValue("dsp_age_path")
    val ga_age_path = commandLine.getOptionValue("ga_age_path")
    val age_output = commandLine.getOptionValue("age_output")
    val date = commandLine.getOptionValue("date")
    val ga_date = commandLine.getOptionValue("ga_date")
    val parallelism = commandLine.getOptionValue("parallelism")

    val spark = SparkSession.builder()
      .appName(s"MergeInstallAgeGender.$date")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "snappy")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()
    val sc = spark.sparkContext

    FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(age_output), true)
    try {
      val dsp_age = spark.read.schema(Constant.schema_age_gender).orc(dsp_age_path)
      val ga_age = spark.read.schema(Constant.schema_age_gender).orc(ga_age_path)
      dsp_age.union(ga_age).createOrReplaceTempView("t_age")

      spark.udf.register("pkg_keys", Logic.pkg_keys _)
      spark.udf.register("split_keys", Logic.split_keys _)

      val update_date = DateUtil.format(DateUtil.getDay(date, "yyyyMMdd", -91), "yyyy-MM-dd")

      val sql = Constant.dmp_install_list_sql.replace("@date", date)
        .replace("@ga_date", ga_date)
        .replace("@update_date", update_date)

      spark.sql(sql).persist(StorageLevel.MEMORY_AND_DISK_SER)
        .createOrReplaceTempView("t_install")

      spark.sql(Constant.dmp_install_list_join_age_sql).repartition(parallelism.toInt)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .orc(age_output)

      val dsp_gender = spark.read.schema(Constant.schema_age_gender).orc(dsp_gender_path)
      val ga_gender = spark.read.schema(Constant.schema_age_gender).orc(ga_gender_path)
      val fb_gender = spark.read.schema(Constant.schema_age_gender).orc(fb_gender_path)
      val tp_gender = spark.read.schema(Constant.schema_age_gender).orc(tp_gender_path)

      dsp_gender.union(ga_gender).union(fb_gender).union(tp_gender).createOrReplaceTempView("t_gender")

      spark.sql(Constant.dmp_install_list_join_gender_sql).repartition(parallelism.toInt)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .orc(gender_output)

    } finally {
      sc.stop()
      spark.stop()
    }
    0
  }

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("dsp_gender_path", true, "[must] dsp_gender_path")
    options.addOption("ga_gender_path", true, "[must] ga_gender_path")
    options.addOption("fb_gender_path", true, "[must] fb_gender_path")
    options.addOption("tp_gender_path", true, "[must] tp_gender_path")
    options.addOption("gender_output", true, "[must] gender_output")

    options.addOption("dsp_age_path", true, "[must] dsp_age_path")
    options.addOption("ga_age_path", true, "[must] ga_age_path")
    options.addOption("age_output", true, "[must] age_output")
    options.addOption("date", true, "[must] date")
    options.addOption("ga_date", true, "[must] ga_date")
    options.addOption("parallelism", true, "[must] parallelism")
    options
  }
}

object MergeInstallAgeGender {
  def main(args: Array[String]): Unit = {
    new MergeInstallAgeGender().run(args)
  }
}