package mobvista.dmp.datasource.dm

import java.net.URI

import com.alibaba.fastjson.JSON
import mobvista.dmp.common.CommonSparkJob
import org.apache.commons.cli.Options
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.{SaveMode, SparkSession}

import scala.collection.mutable

/**
 * @package: mobvista.dmp.datasource.dm
 * @author: wangjf
 * @date: 2018/12/20
 * @time: 上午10:28
 * @email: jinfeng.wang@mobvista.com
 * @phone: 152-1062-7698
 */
class DmDeviceTagStatistics extends CommonSparkJob with java.io.Serializable {

  override protected def run(args: Array[String]): Int = {

    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      printOptions(commandLine)
      return 1
    } else {
      printOptions(commandLine)
    }

    val date = commandLine.getOptionValue("date")
    val output = commandLine.getOptionValue("output")
    val coalesce = commandLine.getOptionValue("coalesce")

    val spark = SparkSession.builder()
      .appName("DmDeviceTagStatistics")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "snappy")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()
    try {

      spark.udf.register("caculateTagCnt", caculateTagCnt _)

      val sql = Constant.dmp_device_tag_statistics_sql.replace("@date", date)

      FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output), true)

      spark.sql(sql)
        .repartition(coalesce.toInt)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .orc(output)

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("date", true, "[must] date")
    options.addOption("output", true, "[must] output path")
    options.addOption("coalesce", true, "[must] coalesce")
    options
  }

  import scala.collection.JavaConverters._
  def caculateTagCnt(tags: String): String = {
    val map = new mutable.HashMap[String, Integer]()
    tags.split(",", -1).foreach(tag => {
      if (map.contains(tag)) {
        map.put(tag, map(tag) + 1)
      } else {
        map.put(tag, 1)
      }
    })
    JSON.toJSON(map.asJava).toString
  }
}

object DmDeviceTagStatistics {
  def main(args: Array[String]): Unit = {
    new DmDeviceTagStatistics().run(args)
  }
}