package mobvista.dmp.datasource.event_tag

import java.util.Date

import com.alibaba.fastjson.JSON
import org.apache.spark.sql.types._
import mobvista.dmp.common.CommonSparkJob
import org.apache.commons.cli.Options
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
import org.apache.commons.lang.StringUtils
import java.net.URI

class Dmp3sEventTag extends CommonSparkJob with Serializable {


  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return 1
    } else {
      printOptions(commandLine)
    }

    val input_path = commandLine.getOptionValue("input")
    val today = commandLine.getOptionValue("today")
    val coalesce = commandLine.getOptionValue("coalesce")
    val output = commandLine.getOptionValue("output")


    val spark = SparkSession
      .builder()
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "lz4")
      .config("spark.io.compression.lz4.blockSize", "64k")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.autoBroadcastJoinThreshold", "209715200")
      .config("spark.sql.broadcastTimeout", "1200")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()


    FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output), true)

    import spark.implicits._
    try {

      val input_LOGRDD = spark.sparkContext
        .textFile(input_path).filter { x => x.split("\t").length > 2 }.map(p => parseRow(p, today))

      val schema = StructType(
        StructField("device_id", StringType) ::
          StructField("device_type", StringType) ::
          StructField("platform", StringType) ::
          StructField("uuid", StringType) ::
          StructField("country", StringType) ::
          StructField("event_name", StringType) ::
          StructField("event_value", StringType) ::
          StructField("event_time", LongType) ::
          StructField("install_time", StringType) ::
          StructField("update_date", StringType) ::
          Nil)

      val df = spark.sqlContext.createDataFrame(input_LOGRDD, schema)
      val result_df = df.na.fill(Map("device_type" -> "-", "platform" -> "-", "event_name" -> "-"))
      result_df.createOrReplaceTempView("etl_3s_event_org_tab")

      /*  var sql1=
          s"""
             |select
             |'' type,
             |t1.uuid,
             |t1.device_id,
             |t1.device_type,
             |t1.platform,
             |case when instr(t2.package_name,'id') = 1 then substr(t2.package_name,3) else package_name end as package_name,
             |t1.country,
             |t1.event_name,
             |t1.event_value,
             |t1.event_time,
             |t1.install_time,
             |t1.update_date
             |from etl_3s_event_org_tab t1 join ( select id,uuid,package_name from dwh.ods_adn_campaign_list where dt like '201%' group by id,uuid,package_name ) t2
             |on (t1.uuid = t2.uuid)
             |group by
             |t1.uuid,
             |t1.device_id,
             |t1.device_type,
             |t1.platform,
             |t2.package_name,
             |t1.country,
             |t1.event_name,
             |t1.event_value,
             |t1.event_time,
             |t1.install_time,
             |t1.update_date
          """.stripMargin*/

      var sql1 =
        """
          |select
          |'' type,
          |t1.uuid,
          |t1.device_id,
          |t1.device_type,
          |t1.platform,
          |t2.package_name,
          |t1.country,
          |t1.event_name,
          |t1.event_value,
          |t1.event_time,
          |t2.first_tag,
          |t2.second_tag,
          |case when d.event_name is null then '0'  else '1' end as pursub,
          |t1.install_time,
          |t1.update_date
          |from etl_3s_event_org_tab t1 join (select t0.id,t0.package_name,t0.uuid,t.first_tag,t.second_tag from (select id,uuid,case when instr(package_name,'id') = 1 then substr(package_name,3) else package_name end as package_name from dwh.ods_adn_campaign_list where dt like '2%') t0 join  dwh.dim_package_tags_combine t on(t0.package_name = t.package_name) group by  t0.id,t0.package_name,t0.uuid,t.first_tag,t.second_tag ) t2
          |on (t1.uuid = t2.uuid)
          |LEFT  JOIN dwh.adv_event_name_tab d on (lower(t1.event_name) = lower(d.event_name))
          |group by
          |t1.uuid,
          |t1.device_id,
          |t1.device_type,
          |t1.platform,
          |t2.package_name,
          |t1.country,
          |t1.event_name,
          |t1.event_value,
          |t1.event_time,
          |t2.first_tag,
          |t2.second_tag,
          |case when d.event_name is null then '0'  else '1' end,
          |t1.install_time,
          |t1.update_date
        """.stripMargin
      spark.sql(sql1).map(row => {
        val `type` = row.getAs[String]("type")
        val uuid = row.getAs[String]("uuid")
        val device_id = row.getAs[String]("device_id")
        val device_type = row.getAs[String]("device_type")
        val platform = row.getAs[String]("platform")
        val package_name = row.getAs[String]("package_name")
        val country = row.getAs[String]("country")
        val event_name = row.getAs[String]("event_name")
        val event_value = row.getAs[String]("event_value")
        val event_time = row.getAs[Long]("event_time").toString
        val first_tag = row.getAs[String]("first_tag")
        val second_tag = row.getAs[String]("second_tag")
        var pursub = row.getAs[String]("pursub")
        val install_time = row.getAs[String]("install_time")
        val update_date = row.getAs[String]("update_date")

        if (StringUtils.isNotBlank(event_value) && "af_purchase".equalsIgnoreCase(event_name) && "1".equalsIgnoreCase(pursub)) {
          var eventVal = 1.0
          try {
            eventVal = event_value.toDouble
            if (eventVal < 0.9) pursub = "0"
          } catch {
            case e: Exception => Some(0)
          }
        }
        (`type`, uuid, device_id, device_type, platform, package_name, country, event_name, event_value, event_time, first_tag, second_tag, pursub, install_time, update_date)
      }).toDF("type", "uuid", "device_id", "device_type", "platform", "package_name", "country", "event_name", "event_value", "event_time", "first_tag", "second_tag", "pursub", "install_time", "update_date")
        .repartition(coalesce.toInt)
        .coalesce(coalesce.toInt)
        .write
        .mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .orc(output)

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }

  def initConfig(conf: Configuration): Configuration = {
    import org.apache.hadoop.io.SequenceFile
    conf.set("mapreduce.output.compress", "true")
    conf.set("mapreduce.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec")
    conf.setBoolean("mapreduce.output.fileoutputformat.compress", true)
    conf.set("mapreduce.output.fileoutputformat.compress.type", SequenceFile.CompressionType.BLOCK.toString)
    conf
  }


  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("today", true, "[must] today")
    options.addOption("coalesce", true, "[must] coalesce")
    options.addOption("input", true, "[must] input")
    options.addOption("output", true, "[must] output path")
    options
  }


  def parseRow(log: String, log_day: String) = {
    val p = log.split("\t")
    val uuid = p(1).split(" ")(0)
    val t_json = p(2)
    val json_data = JSON.parseObject(t_json)
    var event_name = "-"
    var event_time = 0l
    var country = "-"
    var device_id = "-"
    var event_value = "-"
    var install_time = "-"
    var platform = "-"
    var device_type = "-"
    try {
      val query_json = json_data.getJSONObject("query")
      if (query_json != null && query_json.size() > 0) {
        event_name = query_json.getString("event_name")
        event_value = query_json.getString("event_value")
        event_time = query_json.getLong("event_time")
        install_time = query_json.getString("install_time")
        platform = query_json.getString("mobvista_pl")
        var d = new Date()
        d.setTime(event_time * 1000)
        country = query_json.getString("mobvista_country")
        if (country == null || country.isEmpty()) {
          try {
            val ip_detail = json_data.getJSONObject("ip_detail")
            val i_country = ip_detail.getString("edge-two-letter-country")
            if (i_country.nonEmpty) {
              country = i_country
            } else {
              country = "-"
            }
          } catch {
            case e: Exception =>
              // println(e.getMessage)
              country = "-"
          } finally {
          }
        }
        country = country.toUpperCase()

        if (query_json.get("mobvista_gaid") != null && query_json.getString("mobvista_gaid").nonEmpty && query_json.getString("mobvista_gaid").split("-").length > 3) {
          device_id = query_json.getString("mobvista_gaid")
        } else if (query_json.get("mobvista_idfa") != null && query_json.getString("mobvista_idfa").nonEmpty && query_json.getString("mobvista_idfa").split("-").length > 3) {
          device_id = query_json.getString("mobvista_idfa")
        } else if (query_json.get("mobvista_adid") != null && query_json.getString("mobvista_adid").nonEmpty && query_json.getString("mobvista_adid").split("-").length > 3) {
          device_id = query_json.getString("mobvista_adid")
        } else if (query_json.get("mobvista_devid") != null && query_json.getString("mobvista_devid").nonEmpty && query_json.getString("mobvista_devid").split("-").length > 3) {
          device_id = query_json.getString("mobvista_devid")
        } else if (query_json.get("mobvista_imei") != null && query_json.getString("mobvista_imei").nonEmpty && query_json.getString("mobvista_imei").split("-").length > 3) {
          device_id = query_json.getString("mobvista_imei")
        } else {
          try {
            val query_json_qd = json_data.getJSONObject("match_result").getJSONObject("data").getJSONObject("query")
            if (query_json_qd.size() > 0) {
              if (query_json_qd.get("mb_ifa") != null && query_json_qd.getString("mb_ifa").nonEmpty && query_json_qd.getString("mb_ifa").split("-").length > 3) {
                device_id = query_json_qd.getString("mb_ifa")
              } else if (query_json_qd.get("ios_idfa") != null && query_json_qd.getString("ios_idfa").nonEmpty && query_json_qd.getString("ios_idfa").split("-").length > 3) {
                device_id = query_json_qd.getString("ios_idfa")
              } else if (query_json_qd.get("mb_idfa") != null && query_json_qd.getString("mb_idfa").nonEmpty && query_json_qd.getString("mb_idfa").split("-").length > 3) {
                device_id = query_json_qd.getString("mb_idfa")
              } else if (query_json_qd.get("mb_gid") != null && query_json_qd.getString("mb_gid").nonEmpty && query_json_qd.getString("mb_gid").split("-").length > 3) {
                device_id = query_json_qd.getString("mb_gid")
              } else if (query_json_qd.get("mb_gaid") != null && query_json_qd.getString("mb_gaid").nonEmpty && query_json_qd.getString("mb_gaid").split("-").length > 3) {
                device_id = query_json_qd.getString("mb_gaid")
              } else if (query_json_qd.get("ios_ifa") != null && query_json_qd.getString("ios_ifa").nonEmpty && query_json_qd.getString("ios_ifa").split("-").length > 3) {
                device_id = query_json_qd.getString("ios_ifa")
              } else if (query_json_qd.get("ifa") != null && query_json_qd.getString("ifa").nonEmpty && query_json_qd.getString("ifa").split("-").length > 3) {
                device_id = query_json_qd.getString("ifa")
              } else if (query_json_qd.get("idfa") != null && query_json_qd.getString("idfa").nonEmpty && query_json_qd.getString("idfa").split("-").length > 3) {
                device_id = query_json_qd.getString("idfa")
              } else if (query_json_qd.get("mb_adid") != null && query_json_qd.getString("mb_adid").nonEmpty && query_json_qd.getString("mb_adid").split("-").length > 3) {
                device_id = query_json_qd.getString("mb_adid")
              } else if (query_json_qd.get("mb_devid") != null && query_json_qd.getString("mb_devid").nonEmpty && query_json_qd.getString("mb_devid").split("-").length > 3) {
                device_id = query_json_qd.getString("mb_devid")
              }
            }
          } catch {
            case e: Exception =>
              // println(e.getMessage)
              country = "-"
          } finally {
          }
        }

      }

    } catch {
      case e: Exception =>
        // println(e.getMessage)
        country = "-"
    } finally {
    }

    val update_date = log_day

    Row( //五字段一行
      device_id,
      device_type,
      platform,
      uuid,
      country,
      event_name,
      event_value,
      event_time,
      install_time,
      update_date)
  }
}


object Dmp3sEventTag {
  def main(args: Array[String]): Unit = {
    new Dmp3sEventTag().run(args)
  }
}