package mobvista.dmp.main

import mobvista.dmp.common.CommonSparkJob
import org.apache.commons.cli.Options
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.{SaveMode, SparkSession}

import java.net.URI

/**
 * 将抓包程序抓不到的包，加入到黑名单中
 */
class PackageBlackList extends CommonSparkJob with Serializable {
  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return 1
    } else {
      printOptions(commandLine)
    }

    val iosDailyPath = commandLine.getOptionValue("iosDailyPath");
    val adrDailyPath = commandLine.getOptionValue("adrDailyPath");
    val toCrawlerPath = commandLine.getOptionValue("toCrawlerPath");
    val output = commandLine.getOptionValue("output");
    val date = commandLine.getOptionValue("date");

    val spark = SparkSession.builder()
      .appName("dmp_PackageBlackList_fengliang")
      .config("spark.rdd.compress", "true")
      .config("spark.speculation", "true")
      .config("spark.speculation.quantile", "0.8")
      .config("spark.speculation.multiplier", "1")
      .config(" spark.io.compression.codec", "org.apache.spark.io.LZFCompressionCodec")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()
    import spark.implicits._
    val sc = spark.sparkContext

    FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output), true)

    try {
      val iosDailyRDD = sc.textFile(iosDailyPath)
        .map(splitFun(_)(0))
      val adrDailyRDD = sc.textFile(adrDailyPath)
        .map(splitFun(_)(0))

      val packageRDD = spark.sql(
        """
          |SELECT LOWER(package_name) package_name FROM dwh.dim_package_tags GROUP BY LOWER(package_name)
          |""".stripMargin)
        .rdd.map(r => {
        r.getAs[String]("package_name")
      })

      iosDailyRDD.union(adrDailyRDD).union(packageRDD)
        .map(PackageVO)
        .toDF()
        .createOrReplaceTempView("t_package_daily")

      sc.textFile(toCrawlerPath)
        .map(x => PackageVO(splitFun(x)(0)))
        .toDF()
        .createOrReplaceTempView("t_to_crawler")

      val sql =
        s"""
           |select t.package_name
           |from (
           |select a.package_name
           |from t_to_crawler a
           |left outer join t_package_daily b on a.package_name=b.package_name
           |where b.package_name is null
           |union all
           |select package_name
           |from dev.dm_package_black_list
           |where dt='${date}'
           |) t
           |group by t.package_name
        """.stripMargin

      spark.sql(sql)
        .write
        .option("orc.compress", "zlib")
        .mode(SaveMode.Overwrite)
        .orc(output)
    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("iosDailyPath", true, "[must] ios.txt path")
    options.addOption("output", true, "[must] output path")
    options.addOption("adrDailyPath", true, "[must] adr.txt path")
    options.addOption("toCrawlerPath", true, "[must] to_crawler_package_name.txt path")
    options.addOption("date", true, "[must] date ")
    options
  }
}

object PackageBlackList {
  def main(args: Array[String]): Unit = {
    new PackageBlackList().run(args)
  }
}

case class PackageVO(package_name: String)