package mobvista.dmp.datasource.apptag

import java.net.URI

import com.alibaba.fastjson.JSON
import mobvista.dmp.common.CommonSparkJob
import mobvista.dmp.util.MRUtils
import org.apache.commons.cli.{BasicParser, Options}
import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SparkSession

/**
  * @package: mobvista.dmp.datasource.datatory
  * @author: wangjf
  * @date: 2019/07/19
  * @time: 上午11:03
  * @email: jinfeng.wang@mobvista.com
  * @phone: 152-1062-7698
  */
class AppTagJob extends CommonSparkJob with java.io.Serializable {
  def commandOptions(): Options = {
    val options = new Options()
    options.addOption("output", true, "output")
    options
  }

  override protected def run(args: Array[String]): Int = {
    val parser = new BasicParser()
    val options = commandOptions()
    val commandLine = parser.parse(options, args)
    val output = commandLine.getOptionValue("output")

    val spark = SparkSession
      .builder()
      .appName("AppTagJob")
      .config("spark.rdd.compress", "true")
      .config("spark.shuffle.compress", "true")
      .config("spark.io.compression.codec", "lz4")
      .config("spark.io.compression.lz4.blockSize", "64k")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()

    val sc = spark.sparkContext
    try {

      /*
      val tag_input = "s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_old2new_tag/old2new_tag.txt"
      val map = sc.broadcast(sc.textFile(tag_input).map(r => r.split(",")).map(r => {
        (r(9), (r(0), r(1), r(2)))
      }).collectAsMap())
       */

      val map = sc.broadcast(spark.sql(Constant.ods2new_sql).rdd.map(r => {
        (r.getAs("new_second_id").toString, (r.getAs("tag_type").toString, r.getAs("first_tag").toString, r.getAs("second_tag").toString))
      }).union(spark.sql(Constant.ods2new_sql).rdd.map(r => {
        (r.getAs("new_first_id").toString, (r.getAs("tag_type").toString, r.getAs("first_tag").toString, r.getAs("second_tag").toString))
      })).collectAsMap())

      val tagSet = Constant.jdbcConnection(spark, "app_tag", "tag_list",
        "jdbc:mysql://dataplatform-app-tag.c5yzcdreb1xr.us-east-1.rds.amazonaws.com:3306", "apptag_rw", "7gyLEVtkER3u8c9").rdd
        .filter(r => {
          r.getAs("install").toString.equals("10000000000") && Integer.parseInt(r.getAs("tag").toString) == 1
        })
        .map(r => {
          r.getAs("package_name").toString
        }).collect().toSet

      val tagResultDF = Constant.jdbcConnection(spark, "app_tag", "tag_result",
        "jdbc:mysql://dataplatform-app-tag.c5yzcdreb1xr.us-east-1.rds.amazonaws.com:3306", "apptag_rw", "7gyLEVtkER3u8c9").rdd
        .filter(r => {
          tagSet.contains(r.getAs("package_name").toString) && !JSON.parseObject(r.getAs("feat_id").toString).isEmpty &&
            JSON.parseObject(r.getAs("feat_id").toString).keySet().size() > 0
        })
        .map(r => {
          val json = JSON.parseObject(r.getAs("feat_id").toString)
          val iter = json.keySet().iterator()
          var tag_str = ""
          while (iter.hasNext) {
            val i = iter.next()
            if (map.value.contains(i)) {
              val tag = map.value(i)
              tag_str += tag._1 + "," + tag._2 + "," + tag._3 + "#"
            } else {
              tag_str += i + "#"
            }
          }
          MRUtils.JOINER.join(r.getAs("package_name").toString, r.getAs("platform").toString.toLowerCase, tag_str.substring(0, tag_str.length - 1))
        }).cache
      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output + "/packageName"), true)

      tagResultDF.saveAsTextFile(output + "/packageName")

      val appMap = sc.broadcast(Constant.jdbcConnection(spark, "mob_adn", "publisher_channel",
        "jdbc:mysql://adn-datamining-mysql.mobvista.com:3306", "mob_adn_ro", "blueriver123")
        .rdd.filter(r => {
        val s = r.getAs("confirmed_url").toString.split("\\/")
        (r.getAs("platform").toString.equals("2") && s.last.startsWith("id")) ||
          (r.getAs("platform").toString.equals("1") && s.last.contains("id=") && s.last.contains("."))
      }).map(r => {
        val s = r.getAs("confirmed_url").toString.split("\\/")
        val package_name = if (r.getAs("platform").toString.equals("2") && s.last.startsWith("id")) {
          s.last.split("\\?").last.replace("id", "")
        } else {
          s.last.split("\\=").last
        }
        (package_name.toLowerCase, r.getAs("id").toString)
      }).collectAsMap())

      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output + "/appId"), true)
      tagResultDF.map(r => {
        val arr = MRUtils.SPLITTER.split(r)
        val app_id = if (appMap.value.contains(arr(0).toLowerCase)) {
          appMap.value(arr(0).toLowerCase)
        } else {
          ""
        }
        (app_id, arr(1), arr(2))
      }).filter(r => {
        StringUtils.isNotBlank(r._1) && StringUtils.isNotBlank(r._3)
      }).map(r => {
        MRUtils.JOINER.join(r._1, r._2, r._3)
      }).saveAsTextFile(output + "/appId")

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }
}

object AppTagJob {
  def main(args: Array[String]): Unit = {
    new AppTagJob().run(args)
  }
}

