package mobvista.dmp.datasource.app_info_tag

import mobvista.dmp.common.CommonSparkJob
import org.apache.commons.cli.{BasicParser, Options}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}

class Etl_app_info_adr_v2 extends CommonSparkJob with Serializable {

  def commandOptions(): Options = {
    val options = new Options()
    options.addOption("date", true, "date")
    options.addOption("output", true, "output")
    options
  }

  override protected def run(args: Array[String]): Int = {
    val parser = new BasicParser()
    val options = commandOptions()
    val commandLine = parser.parse(options, args)
    val date = commandLine.getOptionValue("date")
    val output = commandLine.getOptionValue("output")
    val spark = SparkSession
      .builder()
      .appName("Etl_app_info_adr_v2")
      .config("spark.rdd.compress", "true")
      .config("spark.shuffle.compress", "true")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.io.compression.codec", "lz4")
      .config("spark.io.compression.lz4.blockSize", "64k")
      .config("spark.sql.autoBroadcastJoinThreshold", "209715200")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()

    try {
      val jobContext = spark.sqlContext
      val sql =
        s"""
           |select * from dwh.dim_app_info_adr where concat(year,month,day)='${date}'
        """.stripMargin
      val adr_rdd = spark.sql(sql).rdd.map(p => {
        Row(
          p(0),
          p(1),
          p(2),
          p(3),
          p(4),
          p(5),
          p(6),
          p(7),
          p(8)
        )
      })
      val adr_schema = StructType(Array(
        StructField("package_name", StringType, nullable = true),
        StructField("app_name", StringType, nullable = true),
        StructField("category_list", StringType, nullable = true),
        StructField("user_rating_count", StringType, nullable = true),
        StructField("download", StringType, nullable = true),
        StructField("track_view_url", StringType, nullable = true),
        StructField("update_time", StringType, nullable = true),
        StructField("artist_name", StringType, nullable = true),
        StructField("content_advisory_rating", StringType, nullable = true)
      ))
      val result = spark.createDataFrame(adr_rdd, adr_schema)
      result.coalesce(10).write.format("orc").mode(SaveMode.Overwrite).save(output)

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }
}

object Etl_app_info_adr_v2 {
  def main(args: Array[String]): Unit = {
    new Etl_app_info_adr_v2().run(args)
  }
}