package mobvista.dmp.datasource.event_tag

import mobvista.dmp.common.CommonSparkJob
import mobvista.prd.datasource.util.GsonUtil
import org.apache.commons.cli.Options
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}

import java.net.URI
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer


class PostBackEvent extends CommonSparkJob with java.io.Serializable {

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("output", true, "[must] output")
    options.addOption("coalesce", true, "[must] coalesce")
    options.addOption("today", true, "[must] today")
    options.addOption("last_sunday", true, "[must] last_sunday")
    options
  }

  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return -1
    } else printOptions(commandLine)

    val output = commandLine.getOptionValue("output")
    val coalesce = commandLine.getOptionValue("coalesce")
    val today = commandLine.getOptionValue("today")
    val last_sunday = commandLine.getOptionValue("last_sunday")


    val spark = SparkSession.builder()
      .appName("PostBackEvent")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "snappy")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()

    FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output), true)

    import spark.implicits._

    try {

      val sql1 =
        s"""
           |select UPPER(idfa) idfa,
           |md5_idfa,
           |LOWER(gaid) gaid,
           |uuid,
           |md5_gaid,
           |lower(pl) platform,
           |app_id package_name,
           |country,
           |event_name,
           |event_value,
           |event_time,
           |install_time,
           |cast(`date` as string) update_date,
           |type
           |from dwh.ods_adn_trackingnew_postback_event where concat(yyyy,mm,dd) = '${today}'  and  type in ('appsflyer','min_appsflyer','tenjin','adjust')
           |group by
           |UPPER(idfa),
           |md5_idfa,
           |LOWER(gaid),
           |uuid,
           |md5_gaid,
           |lower(pl),
           |app_id,
           |country,
           |event_name,
           |event_value,
           |event_time,
           |install_time,
           |`date`,
           |type
           |""".stripMargin

      val df = spark.sql(sql1).filter(filterData(_)).rdd.map(line => {

        val uuid = line.getAs[String]("uuid")
        val country = line.getAs[String]("country")
        val event_value = line.getAs[String]("event_value")
        val event_time = line.getAs[String]("event_time")
        val install_time = line.getAs[String]("install_time")
        val update_date = line.getAs[String]("update_date")
        val typecnt = line.getAs[String]("type")
        val gaid = line.getAs[String]("gaid")
        val idfa = line.getAs[String]("idfa")
        val md5_gaid = line.getAs[String]("md5_gaid")
        val md5_idfa = line.getAs[String]("md5_idfa")
        val platform = line.getAs[String]("platform")
        val event_name = line.getAs[String]("event_name")
        val package_name = line.getAs[String]("package_name")

        var device_id = ""
        var device_id_md5 = ""
        if ("ios".equalsIgnoreCase(platform)) {
          device_id = idfa
          device_id_md5 = md5_idfa
        } else if ("android".equalsIgnoreCase(platform)) {
          device_id = gaid
          device_id_md5 = md5_gaid
        }
        (device_id, device_id_md5, uuid, platform, package_name, country, event_name, event_value, event_time, install_time, update_date, typecnt)
      }
      ).toDF("device_id", "device_id_md5", "uuid", "platform", "package_name", "country", "event_name", "event_value", "event_time", "install_time", "update_date", "type")

      df.createOrReplaceTempView("etl_3s_postback_daily")

      val sql2 =
        s"""
           |select device_id,device_id_md5 from dwh.device_id_md5_match where dt='${last_sunday}' and device_type in ('gaid','idfa') group by device_id,device_id_md5
        """.stripMargin
      spark.sql(sql2).createOrReplaceTempView("etl_3s_postback_md5")

      //coalesce(a.device_id,b.device_id) device_id,
      val sql3 =
        """
          |select a.type,
          |a.uuid,
          |case when a.device_id  rlike '^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$' then a.device_id else b.device_id end as device_id,
          |case when a.platform = 'ios' then  'idfa'  when a.platform = 'android' then 'gaid' end as device_type,
          |a.platform,
          |case when instr(a.package_name,'id') = 1 then substr(a.package_name,3) else a.package_name end as package_name,
          |a.country,
          |a.event_name,
          |a.event_value,
          |a.event_time,
          |a.install_time,
          |a.update_date
          |from etl_3s_postback_daily a left join etl_3s_postback_md5 b on (a.device_id_md5 =b.device_id_md5)
        """.stripMargin
      spark.sql(sql3).repartition(2000).createOrReplaceTempView("etl_3s_pre_daily")

      val package_tag =
        """SELECT package_name, platform, COLLECT_SET(CONCAT_WS('#',first_tag,second_tag)) tags FROM dwh.dim_package_tags_combine
          |  GROUP BY package_name, platform
          |""".stripMargin

      //  left join dwh.adv_event_name_tab d on (lower(a.event_name) = lower(d.event_name))
      val sql4 =
        """
          |select
          |  /*+ mapjoin(d) */
          |  t.type,
          |  t.uuid,
          |  t.device_id,
          |  t.device_type,
          |  t.platform,
          |  t.package_name,
          |  t.country,
          |  t.event_name,
          |  t.event_value,
          |  t.event_time,
          |  t.tags,
          |  case when d.event_name is null then '0' else '1' end as pursub,
          |  t.install_time,
          |  t.update_date
          |from
          |(
          |  select
          |  a.type,
          |  a.uuid,
          |  a.device_id,
          |  a.device_type,
          |  a.platform,
          |  a.package_name,
          |  a.country,
          |  a.event_name,
          |  a.event_value,
          |  a.event_time,
          |  t3.tags,
          |  a.event_name,
          |  a.install_time,
          |  a.update_date
          |  from etl_3s_pre_daily a join (
          |    select package_name, platform, collect_set(concat_ws('#',first_tag,second_tag)) tags from dwh.dim_package_tags_combine group by package_name, platform
          |  ) t3 on(a.package_name = t3.package_name)
          |  group by
          |  a.type,
          |  a.uuid,
          |  a.device_id,
          |  a.device_type,
          |  a.platform,
          |  a.package_name,
          |  a.country,
          |  a.event_name,
          |  a.event_value,
          |  a.event_time,
          |  t3.tags,
          |  a.event_name,
          |  a.install_time,
          |  a.update_date
          |) t left join dwh.adv_event_name_tab d on (lower(t.event_name) = lower(d.event_name))
        """.stripMargin

      //新增匹配规则
      val dataFrame = spark.sql(sql4).rdd.repartition(2000).filter(row => {
        var flag = false
        val device_id = row.getAs[String]("device_id")
        if (StringUtils.isNotBlank(device_id) && device_id.matches(didPtn) && !allZero.equals(device_id)) {
          flag = true
        }
        flag
      }).map(row => {
        val arrayBuffer = new ArrayBuffer[Row]()
        val `type` = row.getAs[String]("type")
        val uuid = row.getAs[String]("uuid")
        val device_id = row.getAs[String]("device_id")
        val device_type = row.getAs[String]("device_type")
        val platform = row.getAs[String]("platform")
        val package_name = row.getAs[String]("package_name")
        val country = row.getAs[String]("country")
        val event_name = row.getAs[String]("event_name")
        val event_value = row.getAs[String]("event_value")
        val event_time = row.getAs[String]("event_time")
        val tags = row.getAs[mutable.WrappedArray[String]]("tags")
        var pursub = row.getAs[String]("pursub")
        val install_time = row.getAs[String]("install_time")
        val update_date = row.getAs[String]("update_date")

        if ("appsflyer".equalsIgnoreCase(`type`) && "1".equalsIgnoreCase(pursub)) {
          if (StringUtils.isNotBlank(event_value) && event_value.startsWith("{")) {
            val value = GsonUtil.String2JsonObject(event_value)
            var af_revenue = 1.0
            try {
              if (value.get("af_revenue") != null) {
                af_revenue = value.get("af_revenue").getAsDouble
                if (af_revenue < 0.9) {
                  pursub = "0"
                }
              }
            } catch {
              case _: Exception =>
                pursub = "0"
            }

            if (value.has("af_content_type") && value.get("af_content_type") != null
              && "advertising".equalsIgnoreCase(value.get("af_content_type").toString)) {
              pursub = "0"
            }
          }
        }

        tags.iterator.foreach(tag => {
          val tag_arr = tag.split("#", -1)
          val first_tag = tag_arr(0)
          val second_tag = tag_arr(1)
          arrayBuffer += Row(`type`, uuid, device_id, device_type, platform, package_name, country, event_name, event_value, event_time, first_tag, second_tag, pursub, install_time, update_date)
        })
        arrayBuffer.toIterator
      }).flatMap(l => l)

      spark.createDataFrame(dataFrame, schema)
        .repartition(coalesce.toInt)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .option("mapreduce.fileoutputcommitter.marksuccessfuljobs", false)
        .orc(output)

    } finally {
      spark.stop()
    }
    0
  }

  val schema: StructType = StructType(Array(
    StructField("type", StringType),
    StructField("uuid", StringType),
    StructField("device_id", StringType),
    StructField("device_type", StringType),
    StructField("platform", StringType),
    StructField("package_name", StringType),
    StructField("country", StringType),
    StructField("event_name", StringType),
    StructField("event_value", StringType),
    StructField("event_time", StringType),
    StructField("first_tag", StringType),
    StructField("second_tag", StringType),
    StructField("pursub", StringType),
    StructField("install_time", StringType),
    StructField("update_date", StringType)))

  def filterData(line: Row): Boolean = {

    val gaid = line.getAs[String]("gaid")
    val idfa = line.getAs[String]("idfa")
    val md5_gaid = line.getAs[String]("md5_gaid")
    val md5_idfa = line.getAs[String]("md5_idfa")
    val platform = line.getAs[String]("platform")
    val event_name = line.getAs[String]("event_name")
    val package_name = line.getAs[String]("package_name")

    if (StringUtils.isBlank(event_name)) {
      return false
    }

    if (StringUtils.isBlank(package_name)) {
      return false
    }

    var device_id = ""
    var device_id_md5 = ""

    if ("ios".equalsIgnoreCase(platform)) {
      device_id = idfa
      device_id_md5 = md5_idfa
    } else if ("android".equalsIgnoreCase(platform)) {
      device_id = gaid
      device_id_md5 = md5_gaid
    } else {
      return false;
    }

    if (StringUtils.isNotBlank(device_id) && device_id.matches(didPtn) && !allZero.equals(device_id)) {
      return true
    }

    if (StringUtils.isNotBlank(device_id_md5) && device_id_md5.matches(imeiMd5Ptn)) {
      return true
    }
    return false
  }
}

object PostBackEvent {
  def main(args: Array[String]): Unit = {
    new PostBackEvent().run(args)
  }
}
