package mobvista.dmp.datasource.device

import java.net.URI

import mobvista.dmp.util.DateUtil
import org.apache.commons.cli.{BasicParser, Options}
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}

import scala.collection.mutable.ArrayBuffer

/**
  * @package: mobvista.dmp.datasource.device
  * @author: wangjf
  * @date: 2020/3/5
  * @time: 4:00 下午
  * @email: jinfeng.wang@mobvista.com
  * @phone: 152-1062-7698
  */
class FilterInstallDaily {
  def commandOptions(): Options = {
    val options = new Options()
    options.addOption("date", true, "date")
    options.addOption("business", true, "business")
    options.addOption("output", true, "output")
    options
  }

  protected def run(args: Array[String]) {
    val parser = new BasicParser()
    val options = commandOptions()
    val commandLine = parser.parse(options, args)
    val date = commandLine.getOptionValue("date")
    val business = commandLine.getOptionValue("business")
    val output = commandLine.getOptionValue("output")

    val spark = SparkSession
      .builder()
      .appName(s"FilterInstallDaily.$business.$date")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "lz4")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()
    try {
      FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output), true)

      val update_date = DateUtil.format(DateUtil.parse(date, "yyyyMMdd"), "yyyy-MM-dd")
      val sql = Constant.dm_install_list_v2_sql.replace("@date", date)
        .replace("@update_date", update_date)
        .replace("@business", business)
      val df = spark.sql(sql)
        .rdd.map(row => {
        val res = new ArrayBuffer[Row]()
        val device_id = row.getAs("device_id").toString
        val device_type = row.getAs("device_type").toString
        val platform = row.getAs("platform").toString
        val install_list = row.getAs("install_list").toString
        install_list.split(",").foreach(install => {
          val installs = install.split("#")
          val package_name = installs(0)
          val update_date = installs(1)
          if (Constant.iosPkgPtn.matcher(package_name).matches || Constant.adrPkgPtn.matcher(package_name).matches) {
            res += Row(device_id, device_type, platform, package_name, update_date)
          }
        })
        res
      }).flatMap(l => l)
      spark.createDataFrame(df, Constant.dm_install_daily_schema)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .orc(output)

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
  }
}

object FilterInstallDaily {
  def main(args: Array[String]): Unit = {
    new FilterInstallDaily().run(args)
  }
}