package mobvista.dmp.datasource.newtag


import mobvista.dmp.common.CommonSparkJob
import mobvista.prd.datasource.util.GsonUtil
import org.apache.commons.cli.Options
import org.apache.hadoop.conf.Configuration
import org.apache.spark.sql.{Row, SparkSession}

import java.util.Properties
import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer

/**
  * 使用新标签体系为安装列表设备打标签，
  * 标签包含人工标注和应用商店映射到新标签体系两种
  * 总体逻辑：
  *   1.MV包标签表与人工标注包标签表取差集，找出为人工标注的包
  *   2.对步骤1数据进行拆分后映射到新标签体系标签
  *   3.步骤2数据与人工标注包信息合并后与安装列表join，为设备打标签
  */
class CalInterestTag extends CommonSparkJob with Serializable {


  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return 1
    } else {
      printOptions(commandLine)
    }

    val today = commandLine.getOptionValue("today")


    val spark = SparkSession
      .builder()
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "snappy")
      .config("spark.sql.autoBroadcastJoinThreshold", "268435456")
      .config("spark.sql.broadcastTimeout","1200")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()



    val sc = spark.sparkContext


    try {
      // dim_app_tag 与 dim_package_tags表取差集, 拆分标签
      var sql =
        s"""
           |SELECT
           |  /*+ MAPJOIN(b) */
           |  a.package_name,
           |  a.platform,
           |  a.tag
           |FROM (
           |       SELECT
           |         t.package_name,
           |         CASE WHEN t.platform = 'ios' THEN 'ios' ELSE 'android' END AS platform,
           |         t.tag
           |       FROM dwh.dim_app_tag t
           |       WHERE concat(t.year, t.month, t.day) = '${today}'
           |     ) a
           |LEFT OUTER JOIN dwh.dim_package_tags b
           |    ON a.package_name = b.package_name AND a.platform = b.platform
           |WHERE b.package_name IS NULL
        """.stripMargin
      import spark.implicits._
      spark.sql(sql)
        .flatMap(flatInterest)
        .toDF()
        .createOrReplaceTempView("t_app_two_tags")

      // 将原有MV标签替换成新标签体系标签
      sql =
        """
          |select /*+ MAPJOIN(a) */ b.package_name, b.platform, b.tag_type, a.new_first_tag_1 as first_tag,
          |  a.new_second_tag_1 as second_tag
          |from dwh.dim_category_mv_new a
          |join t_app_two_tags b on a.mv_first_tag=b.first_tag and a.mv_second_tag=b.second_tag
          |group by  b.package_name, b.platform, b.tag_type, a.new_first_tag_1, a.new_second_tag_1
        """.stripMargin
      spark.sql(sql)
        .createOrReplaceTempView("t_app_two_tags_new")

      spark.sql(sql)
      sql =
        s"""insert overwrite table dwh.dim_package_tags_combine
           |select package_name, platform, tag_type, first_tag, second_tag
           |    from t_app_two_tags_new
           |    union
           |    select package_name, platform, tag_type,  first_tag, second_tag
           |    from dwh.dim_package_tags
        """.stripMargin

      spark.sql(sql)

      val sqlContext = spark.sqlContext
      val properties = new Properties()
      properties.put("user", "adnro")
      properties.put("password", "YcM123glh")
      val url = "jdbc:mysql://adn-mysql-external.mobvista.com:3306/mob_adn"
      sqlContext.read.jdbc(url, "adv_events", properties).select("event_name").filter("adv_event_category_id in('6','47') ")  // .filter("adv_event_category_id in ( '6','47')")
        .toDF("event_name").createOrReplaceTempView("adv_event_name_tab_tmp")

      sql = "insert overwrite table dwh.adv_event_name_tab select distinct  lower(event_name) from adv_event_name_tab_tmp"
      spark.sql(sql)

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }

  def initConfig(conf: Configuration): Configuration = {
    conf.setBoolean("mapreduce.output.compress", true)
    conf.set("mapreduce.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec")
    conf.setBoolean("mapreduce.output.fileoutputformat.compress", true)
    conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec")
    conf.set("orc.compress", "ZLIB")
    conf
  }

  def flatInterest(row: Row): Array[PackageTag] = {
    val buffer = new ArrayBuffer[PackageTag]()
    GsonUtil.String2JsonArray(row.getString(2))
      .foreach(element => {
        val obj = element.getAsJsonObject
        val firstTag = obj.get("1").getAsString
        val secondTag = if (obj.get("2") != null) obj.get("2").getAsString else ""
        buffer += PackageTag(row.getString(0), row.getString(1), "category", firstTag, secondTag)
      })
    buffer.toArray
  }


  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("today", true, "[must] today")
    options
  }
}

object CalInterestTag {
  def main(args: Array[String]): Unit = {
    new CalInterestTag().run(args)
  }
}