package mobvista.dmp.main

import com.fasterxml.jackson.databind.ObjectMapper
import mobvista.dmp.main.Constant._
import mobvista.dmp.util.{DateUtil, MRUtils}
import org.apache.commons.lang3.StringUtils
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, SparkSession}

import java.util
import scala.collection.mutable

class BundleMatchMain extends BundleMatchNewJob {

  var bmap: Broadcast[scala.collection.Map[String, String]] = _

  /**
    * @param business
    * @param date
    * @param input
    * @param output
    * @param oldUnmatch
    * @param unMatchOutput
    * @param spark
    * @param bundleBC
    * @param coalesce
    * @return
    */
  def processData(business: String, date: String, input: String, output: String, oldUnmatch: String, unMatchOutput: String, spark: SparkSession,
                  bundleBC: Broadcast[scala.collection.Map[String, String]], coalesce: Int): DataFrame = {
    val expire_date = DateUtil.format(DateUtil.getDay(date, "yyyyMMdd", -30), "yyyy-MM-dd")
    bmap = bundleBC
    import spark.implicits._
    val processedDf: DataFrame =
      business match {
        case "3s" =>
          spark.read.orc(input).rdd.map(row => {
            val tracking3S = Tracking3S(row.getAs("device_id"), row.getAs("device_type"), row.getAs("platform"),
              row.getAs("package_name"), row.getAs("update_date"), row.getAs("country"))
            tracking3S
          }).toDF
        case "adn_install" =>
          spark.read.orc(input).rdd.map(row => {
            val adnInstall = AdnInstall(row.getAs("device_id"), row.getAs("device_type"), row.getAs("platform"),
              row.getAs("campaign_id"), row.getAs("package_name"))
            adnInstall
          }).toDF
        case "adn_request_sdk" =>
          val df = spark.read.orc(oldUnmatch)
            .filter(r => {
              //  val packageName = r.getAs[String]("package_name")
              r.getAs[String]("update_date").compareTo(expire_date) > 0
            }).toDF()
            .union(spark.read.orc(input)
              .withColumn("package_name", getPkgName(col("platform"), col("package_name")))
            ).repartition(coalesce)
          df.withColumn("flag", checkBundlePkgName(col("platform"), col("package_name")))
            .withColumn("package_name", when(checkBundlePkgName(col("platform"), col("package_name")),
              getBundlePkgName(col("platform"), col("package_name"))).otherwise(col("package_name")))
        case "dsp" =>
          val unmatchData = spark.sparkContext.textFile(oldUnmatch).map(r => {
            val array = MRUtils.SPLITTER.split(r, -1)
            array.length match {
              case 14 =>
                DspReq(array(0), array(1), array(2), array(3), array(4), array(5), array(6), array(7), array(8), array(9),
                  array(10), array(11), array(12), array(13), "")
              case 13 =>
                DspReq(array(0), array(1), array(2), array(3), array(4), array(5), array(6), array(7), array(8), array(9),
                  array(10), array(11), array(12), "", "")
              case 15 =>
                DspReq(array(0), array(1), array(2), array(3), array(4), array(5), array(6), array(7), array(8), array(9),
                  array(10), array(11), array(12), array(13), array(14))
            }
          }).toDF

          unmatchData.coalesce(2000)
          val dayData = spark.sparkContext.textFile(input).map(r => {
            val array = MRUtils.SPLITTER.split(r)
            DspReq(array(0), array(1), array(2), array(3), array(4), array(5), array(6), array(7), array(8), array(9),
              array(10), array(11), array(12), array(13), array(14))
          }).toDF

          val oldValidData = unmatchData.withColumn("package_name", replacePkg(col("package_name")))
            .withColumn("package_name", explode(split($"package_name", ",")))
            .withColumn("package_name", matchPackage(col("platform"), col("package_name")))
            .filter(!($"platform".equalTo("ios") && !$"package_name".startsWith("id") && !$"package_name".rlike(packageRegex)))

          val dayValidData = dayData.filter($"platform".equalTo("ios")).withColumn("package_name", replacePkg(col("package_name")))
            .withColumn("package_name", explode(split($"package_name", ",")))
            .withColumn("package_name", matchPackage(col("platform"), col("package_name")))
            .filter(!($"platform".equalTo("ios") && !$"package_name".startsWith("id") && !$"package_name".rlike(packageRegex)))

          val androidData = dayData.filter($"platform".equalTo("android"))

          val outputDf = dayValidData.union(oldValidData).groupBy("device_id", "device_type")
            .agg(first("platform").alias("platform"),
              first("country").alias("country"),
              first("ip").alias("ip"),
              first("gender").alias("gender"),
              first("birthday").alias("birthday"),
              first("maker").alias("maker"),
              first("model").alias("model"),
              first("os_version").alias("os_version"),
              combinePackage(collect_set("package_name")).alias("package_name"),
              first("androidids").alias("androidids"),
              first("datetime").alias("datetime"),
              first("segment_ids").alias("segment_ids"),
              first("region").alias("region")).union(androidData).withColumn("flag", lit(true))

          val oldUnatchData = unmatchData.withColumn("package_name", replacePkg(col("package_name")))
            .withColumn("package_name", explode(split($"package_name", ",")))
            .withColumn("package_name", matchPackage(col("platform"), col("package_name")))
            .filter($"platform".equalTo("ios") && !$"package_name".startsWith("id") && !$"package_name".rlike(packageRegex))

          val dayUnMatchData = dayData.filter($"platform".equalTo("ios")).withColumn("package_name", replacePkg(col("package_name")))
            .withColumn("package_name", explode(split($"package_name", ",")))
            .withColumn("package_name", matchPackage(col("platform"), col("package_name")))
            .filter($"platform".equalTo("ios") && !$"package_name".startsWith("id") && !$"package_name".rlike(packageRegex))

          val unMatchOutputDf = dayUnMatchData.union(oldUnatchData).groupBy("device_id", "device_type")
            .agg(first("platform").alias("platform"),
              first("country").alias("country"),
              first("ip").alias("ip"),
              first("gender").alias("gender"),
              first("birthday").alias("birthday"),
              first("maker").alias("maker"),
              first("model").alias("model"),
              first("os_version").alias("os_version"),
              combinePackage(collect_set("package_name")).alias("package_name"),
              first("androidids").alias("androidids"),
              first("datetime").alias("datetime"),
              first("segment_ids").alias("segment_ids"),
              first("region").alias("region")).withColumn("flag", lit(false))


          outputDf.union(unMatchOutputDf)
      }
    processedDf
  }

  val replacePkg = udf((package_name: String) => {
    var packageName = ""
    if (package_name.startsWith("[")) packageName = package_name.substring(1, package_name.length - 1)
    packageName
  })

  val matchPackage = udf((platform: String, package_name: String) => {
    var packageName = package_name.substring(1, package_name.length - 1)
    if ("ios".equals(platform) && !package_name.startsWith("id") && !package_name.matches(packageRegex)) {
      val tempPkgName = bmap.value.get(package_name.substring(1, package_name.length - 1))
      if (tempPkgName != None) {
        packageName = tempPkgName.get
      }
    }
    packageName
  })

  val combinePackage = udf((pkgsArrays: mutable.WrappedArray[String]) => {
    var res = "[]"
    val pkgSet: util.Set[String] = new util.HashSet[String]()

    if (pkgsArrays != null && pkgsArrays.size != 0) {
      for (pkgs <- pkgsArrays) {
        if (StringUtils.isNotBlank(pkgs)) {
          val pkgsname = pkgs.split(",", -1)
          for (pkgname <- pkgsname) {
            pkgSet.add(pkgname)
          }
        }
      }
    }
    if (pkgSet.size() != 0) {
      res = new ObjectMapper().writeValueAsString(pkgSet)
    }
    res
  })


  val checkBundlePkgName = udf((platform: String, package_name: String) => {
    var packageName = ""
    if (platform.equals("ios") && !package_name.matches(packageRegex)) {
      val tmp = package_name.replace("id", "")
      if (tmp.matches(packageRegex)) {
        packageName = tmp
      } else {
        val matchPackage = bmap.value.get(package_name)
        if (matchPackage.isDefined) {
          packageName = matchPackage.get
        }
      }
    } else {
      packageName = package_name
    }
    StringUtils.isNotBlank(packageName) && !packageName.equals("0000000000") && !packageName.equals("com.nonepkg.nonepkg")
  })

  val getBundlePkgName = udf((platform: String, package_name: String) => {
    var packageName = package_name
    if (platform.equals("ios") && !package_name.matches(packageRegex)) {
      val tmp = package_name.replace("id", "")
      if (tmp.matches(packageRegex)) {
        packageName = tmp
      } else {
        val matchPackage = bmap.value.get(package_name)
        if (matchPackage.isDefined) {
          packageName = matchPackage.get
        }
      }
    }
    packageName
  })

  val getPkgName = udf((platform: String, package_name: String) => {
    var packageName = ""
    if (StringUtils.isNotBlank(package_name)) {
      packageName = package_name
    } else {
      if (platform.equals("ios")) {
        packageName = "0000000000"
      } else {
        packageName = "com.nonepkg.nonepkg"
      }
    }
    packageName
  })
}

object BundleMatchMain {
  def main(args: Array[String]): Unit = {
    new BundleMatchMain().run(args)
  }
}
