package mobvista.dmp.main

import java.net.URI

import mobvista.prd.datasource.util.GsonUtil
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer

/**
  * 找出安装过某个游戏子兴趣的deviceId
  */
class PkgInstallStatistics extends Serializable {
  val dataSplit = "\t"
  def run (args : Array[String]) : Int = {
    var sc : SparkContext = null
    try {

      val conf = new SparkConf().setAppName("PkgInstallStatistics")
      sc = new SparkContext(conf)

      val date = args(0)
      val date_path = args(1)
      val secondTag = args(2)
      val output = args(3)
//        [{"1":"Games","id":"69"},{"1":"Games","2":"Simulation","id":"83"},{"1":"Games","2":"Family","id":"78"}]
      val appTag = sc.textFile("s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_app_tag/" + date_path)
      val packageData = appTag.filter(filterTags(_, secondTag)).map(x => (StringUtils.splitPreserveAllTokens(x, dataSplit, -1)(0), 1)).collectAsMap()
      val packageBC = sc.broadcast(packageData)

      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output),true)

      val installData = sc.textFile(s"s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/$date_path/*")
      installData.filter(_.contains(date)).map(doTag(_, packageBC, date)).map(x => (x, 1)).reduceByKey(_ + _, 100)
          .map(_._1).filter(_.length > 0).saveAsTextFile(output)
    } finally {
      if (sc != null) {
        sc.stop()
      }
    }
    0
  }

  def doTag(record : String, packageBC : Broadcast[scala.collection.Map[String, Int]], date : String): String = {
    val arrayBuffer = new ArrayBuffer[String]()
    val splits = StringUtils.splitPreserveAllTokens(record, dataSplit, -1)
    val installList = splits(3)
    val jsonArray = GsonUtil.String2JsonArray(installList)
    jsonArray.foreach(x => {
      if (x.toString.contains(date)) {
        val pkg = x.getAsJsonObject.get("package_name").getAsString
        if (packageBC.value.get(pkg) != None) {
          val device_id = splits(0)
          val device_type = splits(1)
          return s"$device_id$dataSplit$device_type"
        }
      }
    })
    ""
  }

  def filterTags(record : String, secondTag : String): Boolean = {
      if (record.contains("Games")) {
        val splits = StringUtils.splitPreserveAllTokens(record, dataSplit, -1)
        val tags = splits(3)
        val jsonArray = GsonUtil.String2JsonArray(tags)
        jsonArray.foreach(element => {
          val str = element.toString
          if (str.contains(secondTag) && str.contains("Games")) {
            return true
          }
        })
      }
      false
  }
}

object PkgInstallStatistics {
  def main(args: Array[String]): Unit = {
    new PkgInstallStatistics().run(args)
  }
}
