package mobvista.dmp.datasource.event_tag

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.MobvistaSparkHadoopUtil
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, SaveMode, SparkSession, _}

/**
 * 刘凯      2017-12-18 15:20
 * 计算 ga purchase event
 */
object Ga_purchase_event {
  def main(args: Array[String]) {
    val spark = SparkSession.builder()
      .enableHiveSupport()
      .getOrCreate()
    spark.conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    spark.conf.set("spark.kryoserializer.buffer.max", "300m")
    //yyyyMMdd
    val loadTime = spark.conf.get("spark.app.loadTime")
    var year = loadTime.substring(0, 4)
    var month = loadTime.substring(4, 6)
    val day = loadTime.substring(6, 8)
    val appName = spark.conf.get("spark.app.name")
    val table_name = spark.conf.get("spark.app.table")
    val db_name = spark.conf.get("spark.app.db_name")
    val outputPath = "s3://mob-emr-test/dataplatform/DataWareHouse/data/" + db_name + "/" + table_name
    spark.conf.set("spark.kryoserializer.buffer.max", "300m")
    //***parquet**
    spark.sparkContext.hadoopConfiguration.set("mapreduce.fileoutputcommitter.algorithm.version", "2")
    spark.sparkContext.hadoopConfiguration.set("yarn.nodemanager.pmem-check-enabled", "false")
    spark.sparkContext.hadoopConfiguration.set("yarn.nodemanager.vmem-check-enabled", "false")
    val jobContext = spark.sqlContext

    /** ******event_define_sql start ** */
    val ga_tag_sql = "select device_id,device_type,platform,country,date,sum(is_pay) from dwh.ods_ga_device_daily where date='" + loadTime + "' and is_pay>0  group by device_id,device_type,platform,country,date"
    var ga_tag_df = jobContext.sql(ga_tag_sql)

    val res_rdd = ga_tag_df.rdd.map { x => operLineRdd(x) }

    val fileSystem = FileSystem.get(spark.sparkContext.hadoopConfiguration)
    val res_schema = StructType(Array(
      StructField("device_id", StringType, true),
      StructField("country", StringType, true),
      StructField("device_type", StringType, true),
      StructField("platform", StringType, true),
      StructField("tag_type", StringType, true),
      StructField("tag_name", StringType, true),
      StructField("tag_value", IntegerType, true),
      StructField("package_name", StringType, true),
      StructField("event_day", StringType, true),
      StructField("day", StringType, true)))
    var res_df = jobContext.createDataFrame(res_rdd, res_schema)
    val partitions = "tag_type"
    val tag_source = "ga"
    val outputPath_temp = outputPath + "/day=" + loadTime + "/tag_source=" + tag_source
    res_df.sortWithinPartitions(partitions.split(",").map(x => {
      new Column(x)
    }): _*).write.mode(SaveMode.Overwrite).format("parquet").partitionBy(partitions.split(","): _*).save(outputPath_temp)

    MobvistaSparkHadoopUtil.sparkHadoopUtil.globPath(new Path(outputPath_temp + "/tag_type=*")).map(x => {
      val directory = x.toString.replace(outputPath, "")
      var day = ""
      var tag_type = ""
      var tag_source = ""
      directory.substring(directory.indexOf("/") + 1).split("/").map(x => {
        if (x.startsWith("day")) {
          day = x.substring(4)
        }
        if (x.startsWith("tag_type")) {
          tag_type = x.substring(9)
        }
        //tag_source=ga
        if (x.startsWith("tag_source")) {
          tag_source = x.substring(11)
        }
        null
      })
      if (day.nonEmpty && tag_source.nonEmpty && tag_type.nonEmpty) {
        val sql = "alter table " + db_name + "." + table_name + " add IF NOT EXISTS partition(day='" + day + "',tag_source='" + tag_source + "',tag_type='" + tag_type + "')"
        jobContext.sql(sql)
      }
    })

    spark.sparkContext.stop()
  }

  def operLineRdd(row: Row) = {
    val device_id = row.getString(0)
    var device_type = row.getString(1)
    var platform = row.getString(2)
    var country = row.getString(3)
    var d = row.getString(4)
    var is_pay = row.getLong(5)
    var tag_value = 0
    var tag_source = "ga"
    var tag_type = "purchase"
    var tag_name = "game_purchase"
    val package_name = ""
    if (is_pay > 0) {
      tag_value = 1
    }
    Row(
      device_id,
      country,
      device_type,
      platform,
      tag_type,
      tag_name,
      tag_value,
      package_name,
      d,
      d)
  }

}