package mobvista.dmp.datasource.app_info_tag

import com.alibaba.fastjson.{JSONArray, JSONObject}
import org.apache.spark.sql.functions.concat_ws
import org.apache.spark.sql.types._
import org.apache.spark.sql.{SparkSession, _}

object Merge_app_info_adr {

  def main2(args: Array[String]): Unit = {

    val user_rating_count_merge = mergeMax("47", "-1")
    val download_merge = mergeMax("-1", "100,000 - 500,000")
    val update_time = mergeMax("20180302", "19970101")

    println(user_rating_count_merge)
    println(download_merge)
    println(update_time)
  }

  def main(args: Array[String]): Unit = {

    val spark = SparkSession
      .builder()
      .appName("merge_app_info_adr")
      .config("spark.rdd.compress", "true")
      .config("spark.shuffle.compress", "true")
      .config("spark.io.compression.codec", "lz4")
      .config("spark.io.compression.lz4.blockSize", "64k")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .config("spark.kryoserializer.buffer.max", "300m")
      .config("spark.kryoserializer.buffer.mb", "128")
      .config("spark.hadoop.mapreduce.input.fileinputformat.split.minsize", "10649600")
      .config("spark.hadoop.mapreduce.input.fileinputformat.split.maxsize", "3145721600")
      .enableHiveSupport()
      .getOrCreate()

    //  val loadTime = spark.conf.get("spark.app.loadTime")
    //  var year = loadTime.substring(0, 4)
    //  var month = loadTime.substring(4, 6)
    //  val day = loadTime.substring(6, 8)
    val output_path = spark.conf.get("spark.app.output_path")
    val input_path = spark.conf.get("spark.app.input_path")
    try {
      val df1 = spark.read.format("orc").option("header", true).option("inferSchema", true).load(input_path)

      /**
       * val adr_rdd = jobContext.sql("select * from dwh.dim_app_info_adr where year='" + year + "' and month='" + month + "' and day='" + day + "'").rdd.map { p =>
       * {
       * Row(
       * p(0),
       * p(1),
       * p(2),
       * p(3),
       * p(4),
       * p(5),
       * p(6))
       * }
       * }
       * val adr_schema = StructType(Array(
       * StructField("package_name", StringType, true),
       * StructField("app_name", StringType, true),
       * StructField("category_list", StringType, true),
       * StructField("user_rating_count", StringType, true),
       * StructField("download", StringType, true),
       * StructField("track_view_url", StringType, true),
       * StructField("update_time", StringType, true)))
       * var df2 = jobContext.createDataFrame(adr_rdd, adr_schema)
       *
       */
      val union_df = df1
      /**
       * StructField("package_name", StringType, true),
       * StructField("app_name", StringType, true),
       * StructField("category_list", StringType, true),
       * StructField("user_rating_count", StringType, true),
       * StructField("download", StringType, true),
       * StructField("track_view_url", StringType, true),
       * StructField("update_time", StringType, true)))
       */
      val rdd_map = union_df.rdd.mapPartitions(iter => {
        iter.map(p => {
          val package_name = String.valueOf(p(0))
          var app_name = String.valueOf(p(1))
          var category_list = String.valueOf(p(2))
          var user_rating_count = p.getString(3)
          val download = p.getString(4)
          var track_view_url = p.getString(5)
          var update_time = p.getString(6)
          var artist_name = p.getString(7)
          var content_advisory_rating = p.getString(8)
          if (app_name == null || app_name.isEmpty() || app_name.equals("null")) {
            app_name = ""
          }

          if (track_view_url == null || track_view_url.isEmpty()) {
            track_view_url = "[]"
          } else if (!track_view_url.startsWith("[")) {
            track_view_url = "[\"" + track_view_url + "\"]"
          }

          if (category_list == null || category_list.isEmpty()) {
            category_list = "[]"
          } else if (!category_list.startsWith("[")) {
            category_list = "[\"" + category_list + "\"]"
          }

          if (user_rating_count == null || user_rating_count.isEmpty() || user_rating_count.equals("null")) {
            user_rating_count = ""
          }

          if (update_time == null || update_time.isEmpty()) {
            update_time = ""
          }

          if (artist_name == null) {
            artist_name = ""
          }

          if (content_advisory_rating == null) {
            content_advisory_rating = ""
          }

          val value = new JSONObject()
          value.put("package_name", String.valueOf(package_name))
          value.put("app_name", app_name)
          value.put("category_list", category_list)
          value.put("user_rating_count", user_rating_count)
          value.put("download", download)
          value.put("track_view_url", track_view_url)
          value.put("update_time", update_time)
          value.put("artist_name", artist_name)
          value.put("content_advisory_rating", content_advisory_rating)

          (package_name, value)
        })
      })
      val result_rdd = rdd_map.reduceByKey((v1, v2) => {
        val json_1 = v1
        val json_2 = v2
        //merge category_list
        var category_list_merge_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
        mergeJSONArray(json_1.getJSONArray("category_list"), category_list_merge_array)
        mergeJSONArray(json_2.getJSONArray("category_list"), category_list_merge_array)
        val category_list_merge_str = "[" + category_list_merge_array.mkString(",") + "]"

        //merge_app_name_str
        /**
         * var app_name_merge_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
         * mergeArrayStr(json_1.getString("app_name"), app_name_merge_array)
         * mergeArrayStr(json_2.getString("app_name"), app_name_merge_array)
         * val app_name_merge_str = "[" + app_name_merge_array.mkString(",") + "]"
         *
         */
        val app_name_merge_str = mergeApp_name(json_1, json_2)

        //merge_track_view_url_str
        var track_view_url_merge_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
        mergeArrayStr(json_1.getString("track_view_url"), track_view_url_merge_array)
        mergeArrayStr(json_2.getString("track_view_url"), track_view_url_merge_array)
        val merge_track_view_url_str = "[" + track_view_url_merge_array.mkString(",") + "]"

        val user_rating_count_merge = mergeMax(json_1.getString("user_rating_count"), json_2.getString("user_rating_count"))
        val download_merge = mergeMax(json_1.getString("download"), json_2.getString("download"))
        val update_time = mergeMax(json_1.getString("update_time"), json_2.getString("update_time"))
        val artist_name = mergeString(json_1.getString("artist_name"), json_2.getString("artist_name"))
        val content_advisory_rating = mergeString(json_1.getString("content_advisory_rating"), json_2.getString("content_advisory_rating"))
        val value = new JSONObject()
        value.put("app_name", app_name_merge_str)
        value.put("category_list", category_list_merge_str)
        value.put("user_rating_count", user_rating_count_merge)
        value.put("download", download_merge)
        value.put("track_view_url", merge_track_view_url_str)
        value.put("update_time", update_time)
        value.put("artist_name", artist_name)
        value.put("content_advisory_rating", content_advisory_rating)
        (value)
      }).map(e => {
        val package_name = e._1
        val data_json = e._2
        var app_name = data_json.getString("app_name")
        val category_list = data_json.getString("category_list")
        var user_rating_count = data_json.getString("user_rating_count")
        var download = data_json.getString("download")
        val track_view_url = data_json.getString("track_view_url")
        var update_time = data_json.getString("update_time")
        var artist_name = data_json.getString("artist_name")
        var content_advisory_rating = data_json.getString("content_advisory_rating")


        if (app_name == null || app_name.isEmpty()) {
          app_name = "none"
        }

        if (user_rating_count == null || user_rating_count.isEmpty()) {
          user_rating_count = "-1"
        }

        if (download == null || download.isEmpty()) {
          download = "none"
        }

        if (update_time == null || update_time.isEmpty()) {
          update_time = "19970101"
        }

        if (artist_name == null || artist_name.isEmpty) {
          artist_name = "none"
        }

        if (content_advisory_rating == null || content_advisory_rating.isEmpty) {
          content_advisory_rating = "none"
        }

        Row(
          package_name,
          app_name,
          category_list,
          user_rating_count,
          download,
          track_view_url,
          update_time,
          artist_name,
          content_advisory_rating
        )
      })

      val schema = StructType(Array(
        StructField("package_name", StringType, true),
        StructField("app_name", StringType, true),
        StructField("category_list", StringType, true),
        StructField("user_rating_count", StringType, true),
        StructField("download", StringType, true),
        StructField("track_view_url", StringType, true),
        StructField("update_time", StringType, true),
        StructField("artist_name", StringType, true),
        StructField("content_advisory_rating", StringType, true)
      ))

      val df = spark.createDataFrame(result_rdd, schema).filter("package_name!='' and package_name is not null and (category_list!='[]')")

      /**
       * val filter_df = jobContext.sql("select package_name as pa from dev.dim_app_info_adr_category where year='2018' and month='04' and day='07' and business in('adn_publish','adn_offer') group by package_name")
       * val join_df = df0.join(filter_df, df0("package_name") === filter_df("pa"), "left")
       * val end_df = join_df.na.fill(Map("pa" -> ""))
       * val df= end_df.filter("pa='' or pa is null")
       * */

      df.select(concat_ws("\t", df.col("package_name"), df.col("app_name"), df.col("category_list"), df.col("user_rating_count"), df.col("download"), df.col("track_view_url"), df.col("update_time"),
        df.col("artist_name"), df.col("content_advisory_rating")))
        .coalesce(1).write.format("text").mode("overwrite").save(output_path)
    } catch {
      case e: Exception =>
        e.printStackTrace()
    } finally {
    }
    spark.sparkContext.stop()
  }

  def mergeJSONArray(array: JSONArray, merge_array: scala.collection.mutable.Set[String]): Unit = {
    for (a <- 0 to array.size() - 1) {
      var obj = array.getJSONObject(a)
      val code = obj.get("code").toString()
      val name = obj.get("name").toString()
      merge_array.add(obj.toString())
    }
  }

  def mergeArrayStr(array_str: String, merge_array: scala.collection.mutable.Set[String]): Unit = {
    if (array_str.startsWith("[") && array_str.endsWith("]")) {
      /**
       * val listPraser=new ListPraser()
       * val array= listPraser.getList(array_str)
       * for (i <- 0 to array.size - 1) {
       * if (array.get(i).nonEmpty) {
       * merge_array.add(array.get(i))
       * }
       * }
       * */
      var array = array_str.substring(1, array_str.length() - 1)
      val p = array.split(",")
      for (a <- 0 to p.length - 1) {
        if (p(a).nonEmpty) {
          merge_array.add(p(a).trim())
        }
      }
    }
  }

  def mergeCount(count1_str: String, count2_str: String): String = {
    var result = 0l
    if (count1_str != null && count1_str.nonEmpty) {
      result += count1_str.toLong
    }

    if (count2_str != null && count2_str.nonEmpty) {
      result += count2_str.toLong
    }
    result.toString()
  }

  def mergeMax(time1_str: String, time2_str: String): String = {
    var result_time = ""
    try {

      if (time1_str != null && time1_str.nonEmpty && !time1_str.equals("null")) {
        result_time = time1_str
      }
      if (time2_str != null && time2_str.nonEmpty && !time2_str.equals("null")) {
        if (time2_str.compareTo(result_time) > 0) {
          result_time = time2_str
        }
      }

    } catch {
      case e: Exception =>
        e.printStackTrace()
        result_time = ""
    } finally {
    }
    result_time
  }

  def mergeApp_name(json_1: JSONObject, json_2: JSONObject): String = {
    var update_time1 = json_1.getString("update_time")
    var update_time2 = json_2.getString("update_time")
    var app_name = ""
    if (update_time1 == null || update_time1.isEmpty()) {
      update_time1 = "0"
    }

    if (update_time2 == null || update_time2.isEmpty()) {
      update_time2 = "0"
    }

    if (update_time2.compareTo(update_time1) > 0 && json_2.getString("app_name") != null && json_2.getString("app_name").nonEmpty) {
      app_name = json_2.getString("app_name")
    } else if (update_time1.compareTo(update_time2) > 0 && json_1.getString("app_name") != null && json_1.getString("app_name").nonEmpty) {
      app_name = json_1.getString("app_name")
    } else {
      if (json_1.getString("app_name") != null && json_1.getString("app_name").nonEmpty) {
        app_name = json_1.getString("app_name")
      } else if (json_2.getString("app_name") != null && json_2.getString("app_name").nonEmpty) {
        app_name = json_2.getString("app_name")
      }
    }
    app_name
  }

  def mergeString(str1: String, str2: String): String = {
    if (str1 != null && !str1.isEmpty && str1.toLowerCase != "null") {
      str1
    } else if (str2 != null && !str2.isEmpty && str2.toLowerCase != "null") {
      str2
    } else {
      ""
    }
  }
}