package mobvista.dmp.datasource.dm

import com.alibaba.fastjson.JSONObject
import mobvista.dmp.common.CommonSparkJob
import mobvista.dmp.util.MRUtils
import org.apache.commons.cli.Options
import org.apache.spark.sql.functions._

import scala.collection.mutable

/**
  * @package: mobvista.dmp.datasource.dm
  * @author: wangjf
  * @date: 2020/5/12
  * @time: 4:04 下午
  * @email: jinfeng.wang@mobvista.com
  * @phone: 152-1062-7698
  */
class ValidateDmpInterest extends CommonSparkJob with Serializable {
  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      printOptions(commandLine)
      return 1
    } else {
      printOptions(commandLine)
    }

    val input_old = commandLine.getOptionValue("input_old")
    val input_new = commandLine.getOptionValue("input_new")
    val output = commandLine.getOptionValue("output")

    val spark = mobvista.dmp.common.MobvistaConstant.createSparkSession("ValidateDmpInterest")
    val old_df = spark.read.orc(input_old)
    val new_df = spark.read.orc(input_new)
    val m_old = old_df.where("system LIKE '%M%'")
    val m_new = new_df.where("system LIKE '%M%'")

    /*
    val m_old_except_new = m_old.select(
      col("devid"),
      col("device_type"),
      col("region")
    ).distinct().alias("old").join(
      m_new.select(
        col("devid"),
        col("device_type"),
        col("region")
      ).distinct().alias("new"), Seq("devid", "region"), "left")
      .where("new.devid is null and new.region is null")
    println("m_old_except_new.count ==>> " + m_old_except_new.count())

    FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output + "/m_old_except_new"), true)
    m_old_except_new.select(
      col("devid"),
      col("region"),
      col("old.device_type")
    ).rdd.saveAsTextFile(output + "/m_old_except_new")

    val m_new_except_old = m_new.select(
      col("devid"), col("region")
    ).distinct().except(m_old.select(
      col("devid"), col("region")
    ).distinct())

    println("m_new_except_old.count ==>> " + m_new_except_old.count())
    */

    val dsp_old = old_df.where("system LIKE '%DSP%'")
    val dsp_new = new_df.where("system LIKE '%DSP%'")

    /*
    val dsp_old_except_new = dsp_old.select(
      col("devid"),
      col("device_type")
    ).distinct().alias("old").join(dsp_new.select(
      col("devid"),
      col("device_type")
    ).distinct().alias("new"), Seq("devid"), "left")
      .where("new.devid is null")
    println("dsp_old_except_new.count ==>> " + dsp_old_except_new.count())

    FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output + "/dsp_old_except_new"), true)
    dsp_old_except_new.select(
      col("devid"),
      col("old.device_type")
    ).rdd.saveAsTextFile(output + "/dsp_old_except_new")

    val dsp_new_except_old = dsp_new.select(
      col("devid")
    ).distinct().except(dsp_old.select(
      col("devid")
    ).distinct())
    println("dsp_new_except_old.count ==>> " + dsp_new_except_old.count())
    */
    m_old.select(col("devid"), getPkgSize(col("tags")).alias("size")).dropDuplicates("devid")
      .select(count(col("devid")).alias("uv"), sum(col("size")).alias("pkgs"))
      .rdd.foreach(r => {
      println("m_old.uv ==>> " + r.getAs[Int]("uv") + ", pkgs ==>> " + r.getAs[Int]("pkgs"))
    })

    m_new.select(col("devid"), getPkgSize(col("tags")).alias("size")).dropDuplicates("devid")
      .select(count(col("devid")).alias("uv"), sum(col("size")).alias("pkgs"))
      .rdd.foreach(r => {
      println("m_new.uv ==>> " + r.getAs[Int]("uv") + ", pkgs ==>> " + r.getAs[Int]("pkgs"))
    })


    dsp_old.select(col("devid"), getPkgSize(col("tags")).alias("tags")).dropDuplicates("devid")
      .select(count(col("devid")).alias("uv"), sum(col("tags")).alias("pkgs"))
      .rdd.foreach(r => {
      println("dsp_old.uv ==>> " + r.getAs[Int]("uv") + ", pkgs ==>> " + r.getAs[Int]("pkgs"))
    })

    dsp_new.select(col("devid"), getPkgSize(col("tags")).alias("tags")).dropDuplicates("devid")
      .select(count(col("devid")).alias("uv"), sum(col("tags")).alias("pkgs"))
      .rdd.foreach(r => {
      println("dsp_new.uv ==>> " + r.getAs[Int]("uv") + ", pkgs ==>> " + r.getAs[Int]("pkgs"))
    })

    /*
    FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output + "/m_old_join_new_pkgs"), true)

    m_old.select(col("devid"), col("tags")).distinct().createOrReplaceTempView("old")
    m_new.select(col("devid"), col("tags")).distinct().sample(0.01).createOrReplaceTempView("new")
    spark.udf.register("parsePkgs", parsePkgs _)
    val sql =
      """
        |SELECT a.devid device_id, parsePkgs(a.tags,b.tags) tag
        | FROM old a LEFT JOIN new b
        | ON a.devid = b.devid
        | WHERE b.devid is not null AND a.tags is not null AND b.tags is not null
        |""".stripMargin
    spark.sql(sql)
      .write
      .mode(SaveMode.Overwrite)
      .option("orc.compress", "snappy")
      .orc(output + "/m_old_join_new_pkgs")
    */

    //.rdd.saveAsTextFile(output + "/m_old_join_new_pkgs")

    if (spark != null) {
      spark.stop()
    }
    0
  }

  val getPkgSize = udf((tags: String) => {
    val set: mutable.Set[String] = new mutable.HashSet[String]()
    import scala.collection.JavaConversions._
    mobvista.dmp.common.MobvistaConstant.String2JSONArray(tags).foreach(json => {
      if (json.asInstanceOf[JSONObject].keySet().contains("package_name")) {
        set.add(json.asInstanceOf[JSONObject].getString("package_name").toLowerCase)
      }
    })
    set.size
  })

  def parsePkgs(old_pkg: String, new_pkg: String): String = {
    val old_set: mutable.Set[String] = new mutable.HashSet[String]()
    import scala.collection.JavaConversions._
    mobvista.dmp.common.MobvistaConstant.String2JSONArray(old_pkg).foreach(json => {
      //  old_set += json.asInstanceOf[JSONObject].getString("package_name").toLowerCase
      if (json.asInstanceOf[JSONObject].keySet().contains("package_name")) {
        old_set.add(json.asInstanceOf[JSONObject].getString("package_name").toLowerCase)
      }
    })

    val new_set: mutable.Set[String] = new mutable.HashSet[String]()
    mobvista.dmp.common.MobvistaConstant.String2JSONArray(new_pkg).foreach(json => {
      //  new_set + json.asInstanceOf[JSONObject].getString("package_name").toLowerCase
      new_set.add(json.asInstanceOf[JSONObject].getString("package_name").toLowerCase)
    })

    val set: mutable.Set[String] = new mutable.HashSet[String]()
    for (elem <- old_set) {
      if (!new_set.contains(elem)) {
        //  set += elem
        set.add(elem)
      }
    }
    val t_set: mutable.Set[String] = new mutable.HashSet[String]()
    for (elem <- new_set) {
      if (!old_set.contains(elem)) {
        t_set.add(elem)
      }
    }
    MRUtils.JOINER.join(set.mkString(","), old_set.size.toString, new_set.size.toString, set.size.toString, t_set.size.toString)
  }

  val parsePkg = udf((old_pkg: String, new_pkg: String) => {

    val old_set: mutable.Set[String] = new mutable.HashSet[String]()
    import scala.collection.JavaConversions._
    mobvista.dmp.common.MobvistaConstant.String2JSONArray(old_pkg).foreach(json => {
      //  old_set += json.asInstanceOf[JSONObject].getString("package_name").toLowerCase
      old_set.add(json.asInstanceOf[JSONObject].getString("package_name").toLowerCase)
    })

    val new_set: mutable.Set[String] = new mutable.HashSet[String]()
    mobvista.dmp.common.MobvistaConstant.String2JSONArray(new_pkg).foreach(json => {
      //  new_set + json.asInstanceOf[JSONObject].getString("package_name").toLowerCase
      new_set.add(json.asInstanceOf[JSONObject].getString("package_name").toLowerCase)
    })

    val set: mutable.Set[String] = new mutable.HashSet[String]()
    for (elem <- old_set) {
      if (!new_set.contains(elem)) {
        //  set += elem
        set.add(elem)
      }
    }
    MRUtils.JOINER.join(set.mkString(","), old_set.size.toString, new_set.size.toString, set.size.toString)
  })

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("input_old", true, "[must] input_old")
    options.addOption("input_new", true, "[must] input_new")
    options.addOption("output", true, "[must] output")
    options
  }
}

object ValidateDmpInterest {
  def main(args: Array[String]): Unit = {
    new ValidateDmpInterest().run(args)
  }
}