package mobvista.dmp.datasource.event_tag

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.MobvistaSparkHadoopUtil
import org.apache.spark.sql._
import org.apache.spark.sql.types._

/**
  * 刘凯      2017-12-18 15:20
  * 计算 3s event
  */
object Event_tag {
  def main(args: Array[String]) {
    val spark = SparkSession.builder()
      .enableHiveSupport()
      .getOrCreate()
    //yyyyMMdd
    val loadTime = spark.conf.get("spark.app.loadTime")
    var year = loadTime.substring(0, 4)
    var month = loadTime.substring(4, 6)
    val day = loadTime.substring(6, 8)
    val appName = spark.conf.get("spark.app.name")
    val db_name = spark.conf.get("spark.app.db_name")
    val table_name = spark.conf.get("spark.app.table")

    val outputPath = "s3://mob-emr-test/dataplatform/DataWareHouse/data/" + db_name + "/" + table_name
    val categories_package_3s = "s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_categories_package_3s/tmp/categories.txt"
    val event_type_brocast = spark.sparkContext.broadcast(new EventTypeUtils())

    //***parquet**
    //spark.sparkContext.hadoopConfiguration.set("mapreduce.fileoutputcommitter.algorithm.version", "2")
    //spark.sparkContext.hadoopConfiguration.set("yarn.nodemanager.pmem-check-enabled", "false")
    //spark.sparkContext.hadoopConfiguration.set("yarn.nodemanager.vmem-check-enabled", "false")
    val jobContext = spark.sqlContext
    /**
     * categories_package_3s
     */
    val categories_LOGRDD = spark.sparkContext.textFile(categories_package_3s)
    val categoriesRdd = categories_LOGRDD.filter { x => !x.contains("catego") }.map(_.split("\t")).filter(_.length > 4).map(p => Row(p(0).toInt, p(1), p(2), p(3), p(4)))
    val categories_schema = StructType(Array(
      StructField("uuid", IntegerType, true),
      StructField("uuid_name", StringType, true),
      StructField("platform", StringType, true),
      StructField("package_name", StringType, true),
      StructField("categories", StringType, true)))
    var package_df = jobContext.createDataFrame(categoriesRdd, categories_schema)

    /** ******event_define_sql start ***/
    val define_CSVRDD = spark.sparkContext.textFile("s3://trackingcsv-3s/trackingcsv/event_define/" + year + "/" + month + "/" + day + "/*.csv")
    val definessRdd = define_CSVRDD.map(_.split("\t")).filter(_.length > 4).map(p => Row(p(1), p(2), p(4), p(7)))
    val define_schema = StructType(Array(
      StructField("event_name", StringType, true),
      StructField("scope", StringType, true),
      StructField("event_type", StringType, true),
      StructField("offer_id", StringType, true)))
    var event_define_df = jobContext.createDataFrame(definessRdd, define_schema).filter("scope='custom'").distinct()


    /** ******event_info_sql start ***/
    val event_info_sql = "select device_id,platform,event_name,mobvista_clickid,campaign_id,country,event_day,day from dwh.dmp_3s_event_source  where day='" + loadTime + "'"
    var event_info_df = jobContext.sql(event_info_sql)
    event_info_df = event_info_df.select(event_info_df.col("device_id"), event_info_df.col("platform"), event_info_df.col("event_name"), event_info_df.col("mobvista_clickid"), event_info_df.col("country"), event_info_df.col("event_day"), event_info_df.col("campaign_id"), event_info_df.col("day")).distinct()

    val inner_join_df1 = event_define_df.join(event_info_df, event_define_df.col("event_name") === event_info_df.col("event_name") and event_define_df.col("offer_id") === event_info_df.col("campaign_id"), "inner")

    val inner_join_df2 = inner_join_df1.join(package_df, event_info_df.col("campaign_id") === package_df.col("uuid"), "left")
      .select(inner_join_df1.col("device_id"), inner_join_df1.col("platform"), inner_join_df1.col("country"), inner_join_df1.col("event_day"), inner_join_df1.col("event_type"), inner_join_df1.col("campaign_id"), package_df.col("package_name"), package_df.col("categories"), inner_join_df1.col("day")).distinct()
    val result_join_df = inner_join_df2.na.fill(Map("package_name" -> "-", "categories" -> "-"))

    val res_rdd = result_join_df.rdd.map { row => {
      val event_type_utils = event_type_brocast.value
      val device_id = row.getString(0)
      var platform = row.getString(1)
      val country = row.getString(2)
      val event_day = row.getString(3)
      val event_type = row.getString(4)
      val uuid = row.getString(5)
      val package_name = row.getString(6)
      var device_type = "-"
      var tag_value = 0
      var tag_type = "-"
      var tag_name = "-"
      val categories = row.getString(7)
      val day = row.getString(8)
      if (event_type_utils.shopping_list.contains(event_type)) {
        if (categories.equals("Shopping")) {
          tag_type = "shopping"
          if (event_type.contains("Add To Wish")) {
            tag_name = "mv_add_to_wish"
            tag_value = 1
          }
          if (event_type.contains("Add to Cart")) {
            tag_name = "mv_add_to_Cart"
            tag_value = 1
          }
        } else {
          tag_type = "other_event"
          tag_name = "other_event"
          tag_value = 1
        }

      } else if (event_type_utils.purchase_list.contains(event_type)) {
        tag_type = "purchase"
        if (categories.equals("Games")) {
          tag_name = "game_purchase"
          tag_value = 1
        } else if (categories.equals("Shopping")) {
          tag_name = "shopping_purchase"
          tag_value = 1
        } else {
          tag_name = "other_purchase"
          tag_value = 1
        }
      } else if (event_type_utils.other_list.contains(event_type)) {
        tag_type = "other_event"
        tag_name = "other_event"
        tag_value = 1
      }

      if (platform != null && platform.nonEmpty) {
        if (platform.toLowerCase().contains("android")) {
          device_type = "gaid"
        } else if (platform.toLowerCase().contains("ios")) {
          device_type = "idfa"
        }
      }
      else {
        platform = "-"
      }
      Row(
        device_id,
        country,
        device_type,
        platform,
        tag_type,
        tag_name,
        tag_value,
        package_name,
        event_day,
        day)
    }

    }

    val fileSystem = FileSystem.get(spark.sparkContext.hadoopConfiguration)
    val res_schema = StructType(Array(
      StructField("device_id", StringType, true),
      StructField("country", StringType, true),
      StructField("device_type", StringType, true),
      StructField("platform", StringType, true),
      StructField("tag_type", StringType, true),
      StructField("tag_name", StringType, true),
      StructField("tag_value", IntegerType, true),
      StructField("package_name", StringType, true),
      StructField("event_day", StringType, true),
      StructField("day", StringType, true)))
    var res_df = jobContext.createDataFrame(res_rdd, res_schema)


    res_df.registerTempTable("res_table")
    val end_df = jobContext.sql("select device_id,country,device_type,platform,tag_type,tag_name,sum(tag_value) tag_sum,concat_ws(',',collect_set(package_name)) as pack_sum,day from res_table where tag_value>0 group by device_id,country,device_type,platform,tag_type,tag_name,day")
    val result_df = jobContext.createDataFrame(end_df.rdd.map { x => operLineResult(x) }, res_schema)
    val partitions = "tag_type"
    val tag_source = "3s"
    val outputPath_temp = outputPath + "/day=" + loadTime + "/tag_source=" + tag_source
    result_df.sortWithinPartitions(partitions.split(",").map(x => {
      new Column(x)
    }): _*).write.mode(SaveMode.Overwrite).format("parquet").partitionBy(partitions.split(","): _*).save(outputPath_temp)


    MobvistaSparkHadoopUtil.sparkHadoopUtil.globPath(new Path(outputPath_temp + "/tag_type=*")).map(x => {
      val directory = x.toString.replace(outputPath, "")
      var day = ""
      var tag_type = ""
      var tag_source = ""
      directory.substring(directory.indexOf("/") + 1).split("/").map(x => {
        if (x.startsWith("day")) {
          day = x.substring(4)
        }
        if (x.startsWith("tag_type")) {
          tag_type = x.substring(9)
        }
        //tag_source=ga
        if (x.startsWith("tag_source")) {
          tag_source = x.substring(11)
        }
        null
      })
      if (day.nonEmpty && tag_source.nonEmpty && tag_type.nonEmpty) {
        val sql = "alter table " + db_name + "." + table_name + " add IF NOT EXISTS partition(day='" + day + "',tag_source='" + tag_source + "',tag_type='" + tag_type + "')"
        jobContext.sql(sql)
      }
    })
    jobContext.sql("refresh table " + db_name + "." + table_name)
    spark.sparkContext.stop()
  }

  def operLineResult(row: Row) = {
    val tag_sum = row.getLong(6)
    val pack_sum = row.getString(7)
    val day = row.getString(8)
    Row(
      row.getString(0),
      row.getString(1),
      row.getString(2),
      row.getString(3),
      row.getString(4),
      row.getString(5),
      tag_sum.toInt,
      pack_sum,
      day,
      day)
  }
}