package mobvista.dmp.datasource.app_info_tag

import com.alibaba.fastjson.JSONObject
import org.apache.spark.sql.functions.concat_ws
import org.apache.spark.sql.types._
import org.apache.spark.sql.{SparkSession, _}

object Merge_app_info_ios {
  def main2(args: Array[String]): Unit = {
    val line = Console.readLine()
    val line2 = Console.readLine()
    println("sdsd==" + line.trim() + "===dsd")
    var bundle_id_merge_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
    mergeArrayLanguage(line, bundle_id_merge_array)
    mergeArrayLanguage(line2, bundle_id_merge_array)
    val bundle_id_merge_str = "[" + bundle_id_merge_array.mkString(",") + "]"
    println(bundle_id_merge_str)
  }


  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .appName("merge_app_info_ios")
      .config("spark.rdd.compress", "true")
      .config("spark.shuffle.compress", "true")
      .config("spark.io.compression.codec", "lz4")
      .config("spark.io.compression.lz4.blockSize", "64k")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .config("spark.kryoserializer.buffer.max", "300m")
      .config("spark.kryoserializer.buffer.mb", "128")
      .config("spark.hadoop.mapreduce.input.fileinputformat.split.minsize", "10649600")
      .config("spark.hadoop.mapreduce.input.fileinputformat.split.maxsize", "3145721600")
      .enableHiveSupport()
      .getOrCreate()

    //  val loadTime = spark.conf.get("spark.app.loadTime")
    //  var year = loadTime.substring(0, 4)
    //  var month = loadTime.substring(4, 6)
    //  val day = loadTime.substring(6, 8)
    val output_path = spark.conf.get("spark.app.output_path")
    val input_path = spark.conf.get("spark.app.input_path")
    try {
      val df1 = spark.read.format("orc").option("header", true).option("inferSchema", true).load(input_path)
      /**
       * val ios_rdd = jobContext.sql("select * from dwh.dim_app_info_ios where year='" + year + "' and month='" + month + "' and day='" + day + "'").rdd.map { p =>
       * {
       * Row(
       * p(0),
       * p(1),
       * p(2),
       * p(3),
       * p(4),
       * p(5),
       * p(6),
       * p(7),
       * p(8),
       * p(9),
       * p(10),
       * p(11),
       * p(12))
       * }
       * }
       *
       * val ios_schema = StructType(Array(
       * StructField("package_name", StringType, true),
       * StructField("app_name", StringType, true),
       * StructField("file_size_bytes", StringType, true),
       * StructField("support_language", StringType, true),
       * StructField("bundle_id", StringType, true),
       * StructField("release_date", StringType, true),
       * StructField("primary_category", StringType, true),
       * StructField("primary_category_id", StringType, true),
       * StructField("category_list", StringType, true),
       * StructField("category_id_list", StringType, true),
       * StructField("user_rating_count", StringType, true),
       * StructField("track_view_url", StringType, true),
       * StructField("update_time", StringType, true)))
       * var df2 = jobContext.createDataFrame(ios_rdd, ios_schema)
       * */
      val union_df = df1

      /**
       * val schema = StructType(Array(
       * StructField("package_name", StringType, true),
       * StructField("app_name", StringType, true),
       * StructField("file_size_bytes", StringType, true),
       * StructField("support_language", StringType, true),
       * StructField("bundle_id", StringType, true),
       * StructField("release_date", StringType, true),
       * StructField("primary_category", StringType, true),
       * StructField("primary_category_id", StringType, true),
       * StructField("category_list", StringType, true),
       * StructField("category_id_list", StringType, true),
       * StructField("user_rating_count", StringType, true),
       * StructField("track_view_url", StringType, true),
       * StructField("update_time", StringType, true)))
       * var df = spark.createDataFrame(result_rdd, schema)
       *
       */
      val rdd_map = union_df.rdd.mapPartitions(iter => {
        iter.map(p => {
          val package_name = String.valueOf(p(0))
          var app_name = String.valueOf(p(1))
          var file_size_bytes = String.valueOf(p(2))
          var support_language = p.getString(3)
          var bundle_id = p.getString(4)
          var release_date = p.getString(5)
          var primary_category = p.getString(6)
          var primary_category_id = p.getString(7)
          var category_list = p.getString(8)
          var category_id_list = p.getString(9)
          var user_rating_count = p.getString(10)
          var track_view_url = p.getString(11)
          var update_time = p.getString(12)
          val artist_id = if (p.getString(13) == null) "" else p.getString(13)
          val artist_name = if (p.getString(14) == null) "" else p.getString(14)
          val seller_name = if (p.getString(15) == null) "" else p.getString(15)
          val is_game_center_enabled = if (p.getString(16) == null) "" else p.getString(16)
          val content_advisory_rating = if (p.getString(17) == null) "" else p.getString(17)
          val advisories = if (p.getString(18) == null) "" else p.getString(18)
          if (app_name == null || app_name.isEmpty() || app_name.equals("null")) {
            app_name = ""
          }

          if (bundle_id == null || bundle_id.isEmpty() || bundle_id.equals("null")) {
            bundle_id = "[]"
          } else if (!bundle_id.startsWith("[")) {
            bundle_id = "[\"" + bundle_id + "\"]"
          }

          if (support_language == null || support_language.isEmpty() || support_language.equals("null")) {
            support_language = "[]"
          } else if (!support_language.startsWith("[")) {
            support_language = "[" + support_language + "]"
          }

          if (track_view_url == null || track_view_url.isEmpty()) {
            track_view_url = "[]"
          } else if (!track_view_url.startsWith("[")) {
            track_view_url = "[\"" + track_view_url + "\"]"
          }

          if (primary_category == null || primary_category.isEmpty()) {
            primary_category = "[]"
          } else if (!primary_category.startsWith("[")) {
            primary_category = "[\"" + primary_category + "\"]"
          }

          if (primary_category_id == null || primary_category_id.isEmpty()) {
            primary_category_id = "[]"
          } else if (!primary_category_id.startsWith("[")) {
            primary_category_id = "[\"" + primary_category_id + "\"]"
          }

          if (category_list == null || category_list.isEmpty()) {
            category_list = "[]"
          } else if (!category_list.startsWith("[")) {
            category_list = "[\"" + category_list + "\"]"
          }

          if (category_id_list == null || category_id_list.isEmpty()) {
            category_id_list = "[]"
          } else if (!category_id_list.startsWith("[")) {
            category_id_list = "[\"" + category_id_list + "\"]"
          }

          if (user_rating_count == null || user_rating_count.isEmpty() || user_rating_count.equals("null")) {
            user_rating_count = ""
          }


          if (file_size_bytes == null || file_size_bytes.isEmpty() || file_size_bytes.equals("null")) {
            file_size_bytes = ""
          }


          if (release_date == null || release_date.isEmpty()) {
            release_date = ""
          }

          if (update_time == null || update_time.isEmpty()) {
            update_time = ""
          }

          val value = new JSONObject()
          value.put("package_name", String.valueOf(package_name))
          value.put("app_name", app_name)
          value.put("file_size_bytes", file_size_bytes)
          value.put("support_language", support_language)
          value.put("bundle_id", bundle_id)
          value.put("release_date", release_date)
          value.put("primary_category", primary_category)
          value.put("primary_category_id", primary_category_id)
          value.put("category_list", category_list)
          value.put("category_id_list", category_id_list)
          value.put("user_rating_count", user_rating_count)
          value.put("track_view_url", track_view_url)
          value.put("update_time", update_time)
          value.put("artist_id", artist_id)
          value.put("artist_name", artist_name)
          value.put("seller_name", seller_name)
          value.put("is_game_center_enabled", is_game_center_enabled)
          value.put("content_advisory_rating", content_advisory_rating)
          value.put("advisories", advisories)
          (package_name, value)
        })
      })
      val result_rdd = rdd_map.reduceByKey((v1, v2) => {
        val json_1 = v1
        val json_2 = v2
        //merge_app_name_str
        /**
         * var app_name_merge_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
         * mergeArrayStr(json_1.getString("app_name"), app_name_merge_array)
         * mergeArrayStr(json_2.getString("app_name"), app_name_merge_array)
         * val app_name_merge_str = "[" + app_name_merge_array.mkString(",") + "]"
         *
         */
        val app_name_merge_str = mergeApp_name(json_1, json_2)

        //file_size_bytes_merge_str
        val file_size_bytes_merge_str = mergeMax(json_1.getString("file_size_bytes"), json_2.getString("file_size_bytes"))

        //support_language_str
        var support_language_merge_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
        mergeArrayLanguage(json_1.getString("support_language"), support_language_merge_array)
        mergeArrayLanguage(json_2.getString("support_language"), support_language_merge_array)
        val support_language_merge_str = "[" + support_language_merge_array.mkString(",") + "]"

        //bundle_id_str
        var bundle_id_merge_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
        mergeArrayStr(json_1.getString("bundle_id"), bundle_id_merge_array)
        mergeArrayStr(json_2.getString("bundle_id"), bundle_id_merge_array)
        val bundle_id_merge_str = "[" + bundle_id_merge_array.mkString(",") + "]"

        //release_date_str
        val release_date_str = mergeMax(json_1.getString("release_date"), json_2.getString("release_date"))

        //primary_category_array_str
        var primary_category_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
        mergeArrayStr(json_1.getString("primary_category"), primary_category_array)
        mergeArrayStr(json_2.getString("primary_category"), primary_category_array)
        val primary_category_merge_str = "[" + primary_category_array.mkString(",") + "]"

        //primary_category_id_str
        var primary_category_id_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
        mergeArrayStr(json_1.getString("primary_category_id"), primary_category_id_array)
        mergeArrayStr(json_2.getString("primary_category_id"), primary_category_id_array)
        val primary_category_id_array_str = "[" + primary_category_id_array.mkString(",") + "]"

        //category_list_str
        var category_list_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
        mergeArrayStr(json_1.getString("category_list"), category_list_array)
        mergeArrayStr(json_2.getString("category_list"), category_list_array)
        val category_list_array_str = "[" + category_list_array.mkString(",") + "]"

        //category_id_list_str
        var category_id_list_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
        mergeArrayStr(json_1.getString("category_id_list"), category_id_list_array)
        mergeArrayStr(json_2.getString("category_id_list"), category_id_list_array)
        val category_id_list_array_str = "[" + category_id_list_array.mkString(",") + "]"

        //user_rating_count_str
        val user_rating_count_merge = mergeMax(json_1.getString("user_rating_count"), json_2.getString("user_rating_count"))

        //merge_track_view_url_str
        var track_view_url_merge_array: scala.collection.mutable.Set[String] = scala.collection.mutable.Set()
        mergeArrayStr(json_1.getString("track_view_url"), track_view_url_merge_array)
        mergeArrayStr(json_2.getString("track_view_url"), track_view_url_merge_array)
        val merge_track_view_url_str = "[" + track_view_url_merge_array.mkString(",") + "]"

        //update_time
        val update_time = mergeMax(json_1.getString("update_time"), json_2.getString("update_time"))

        val artist_id = mergeString(json_1.getString("artist_id"), json_2.getString("artist_id"))
        val artist_name = mergeString(json_1.getString("artist_name"), json_2.getString("artist_name"))
        val seller_name = mergeString(json_1.getString("seller_name"), json_2.getString("seller_name"))
        val is_game_center_enabled = mergeString(json_1.getString("is_game_center_enabled"), json_2.getString("is_game_center_enabled"))
        val content_advisory_rating = mergeString(json_1.getString("content_advisory_rating"), json_2.getString("content_advisory_rating"))
        val advisories = mergeString(json_1.getString("advisories"), json_2.getString("advisories"))

        val value = new JSONObject()
        value.put("app_name", app_name_merge_str)
        value.put("file_size_bytes", file_size_bytes_merge_str)
        value.put("support_language", support_language_merge_str)
        value.put("bundle_id", bundle_id_merge_str)
        value.put("release_date", release_date_str)
        value.put("primary_category", primary_category_merge_str)
        value.put("primary_category_id", primary_category_id_array_str)
        value.put("category_list", category_list_array_str)
        value.put("category_id_list", category_id_list_array_str)
        value.put("user_rating_count", user_rating_count_merge)
        value.put("track_view_url", merge_track_view_url_str)
        value.put("update_time", update_time)
        value.put("artist_id", artist_id)
        value.put("artist_name", artist_name)
        value.put("seller_name", seller_name)
        value.put("is_game_center_enabled", is_game_center_enabled)
        value.put("content_advisory_rating", content_advisory_rating)
        value.put("advisories", advisories)
        (value)
      }).map(e => {
        var package_name = e._1
        val data_json = e._2
        var app_name = data_json.getString("app_name")
        var file_size_bytes = data_json.getString("file_size_bytes")
        var support_language = data_json.getString("support_language")
        var bundle_id = data_json.getString("bundle_id")
        var release_date = data_json.getString("release_date")
        val primary_category = data_json.getString("primary_category")
        val primary_category_id = data_json.getString("primary_category_id")
        val category_list = data_json.getString("category_list")
        val category_id_list = data_json.getString("category_id_list")
        var user_rating_count = data_json.getString("user_rating_count")
        val track_view_url = data_json.getString("track_view_url")
        var update_time = data_json.getString("update_time")
        val artist_id = data_json.getString("artist_id")
        val artist_name = data_json.getString("artist_name")
        val seller_name = data_json.getString("seller_name")
        val is_game_center_enabled = data_json.getString("is_game_center_enabled")
        val content_advisory_rating = data_json.getString("content_advisory_rating")
        val advisories = data_json.getString("advisories")

        if (app_name == null || app_name.isEmpty()) {
          app_name = "none"
        }

        if (file_size_bytes == null || file_size_bytes.isEmpty()) {
          file_size_bytes = "-1"
        }

        if (bundle_id == null || bundle_id.isEmpty()) {
          if (!bundle_id.endsWith("]")) {
            bundle_id += "]"
          }
        }

        if (user_rating_count == null || user_rating_count.isEmpty()) {
          user_rating_count = "-1"
        }

        if (release_date == null || release_date.isEmpty()) {
          release_date = "1997-01-01T00:01:05Z"
        }

        if (update_time == null || update_time.isEmpty()) {
          update_time = "19970101"
        }

        val pattern = """^(\d+)$""".r
        val fg = package_name match {
          case pattern(_*) => true
          case _ => false
        }
        if (!fg) {
          package_name = ""
        }

        Row(
          package_name,
          app_name,
          file_size_bytes,
          support_language,
          bundle_id,
          release_date,
          primary_category,
          primary_category_id,
          category_list,
          category_id_list,
          user_rating_count,
          track_view_url,
          update_time,
          artist_id,
          artist_name,
          seller_name,
          is_game_center_enabled,
          content_advisory_rating,
          advisories
        )
      })

      val schema = StructType(Array(
        StructField("package_name", StringType, true),
        StructField("app_name", StringType, true),
        StructField("file_size_bytes", StringType, true),
        StructField("support_language", StringType, true),
        StructField("bundle_id", StringType, true),
        StructField("release_date", StringType, true),
        StructField("primary_category", StringType, true),
        StructField("primary_category_id", StringType, true),
        StructField("category_list", StringType, true),
        StructField("category_id_list", StringType, true),
        StructField("user_rating_count", StringType, true),
        StructField("track_view_url", StringType, true),
        StructField("update_time", StringType, true),
        StructField("artist_id", StringType, true),
        StructField("artist_name", StringType, true),
        StructField("seller_name", StringType, true),
        StructField("is_game_center_enabled", StringType, true),
        StructField("content_advisory_rating", StringType, true),
        StructField("advisories", StringType, true)
      ))
      val df = spark.createDataFrame(result_rdd, schema).filter("package_name!='' and  package_name is not null and primary_category_id!='[]'")

      /**
       * val filter_df = jobContext.sql("select package_name as pa from dev.dim_app_info_ios_category where year='2018' and month='04' and day='07' and business in('adn_publish','adn_offer') group by package_name")
       * val join_df = df0.join(filter_df, df0("package_name") === filter_df("pa"), "left")
       * val end_df = join_df.na.fill(Map("pa" -> ""))
       * val df= end_df.filter("pa='' or pa is null")
       * */

      df.select(concat_ws("\t", df.col("package_name"), df.col("app_name"), df.col("file_size_bytes"), df.col("support_language"), df.col("bundle_id"), df.col("release_date"), df.col("primary_category"), df.col("primary_category_id"), df.col("category_list"), df.col("category_id_list"), df.col("user_rating_count"), df.col("track_view_url"), df.col("update_time"),
        df.col("artist_id"), df.col("artist_name"), df.col("seller_name"), df.col("is_game_center_enabled"), df.col("content_advisory_rating"), df.col("advisories")))
        //.repartition(1).rdd.saveAsTextFile(output_path+"/part-r-"+new Date().getTime+".txt")
        .coalesce(1).write.format("text").mode("overwrite").save(output_path)
    } catch {
      case e: Exception =>
        e.printStackTrace()
    } finally {
    }
    spark.sparkContext.stop()
  }

  def mergeArrayStr(array_str: String, merge_array: scala.collection.mutable.Set[String]): Unit = {
    if (array_str.startsWith("[") && array_str.endsWith("]")) {
      /**
       * val listPraser=new ListPraser()
       * val array= listPraser.getList(array_str)
       * for (i <- 0 to array.size - 1) {
       * if (array.get(i).nonEmpty) {
       * merge_array.add(array.get(i))
       * }
       * }
       * */
      var array = array_str.substring(1, array_str.length() - 1)
      val p = array.split(",")
      for (a <- 0 to p.length - 1) {
        if (p(a).nonEmpty) {
          merge_array.add(p(a).trim())
        }
      }
    }
  }


  def mergeArrayLanguage(array_str: String, merge_array: scala.collection.mutable.Set[String]): Unit = {
    if (array_str.startsWith("[") && array_str.endsWith("]")) {
      /**
       * val listPraser=new ListPraser()
       * val array= listPraser.getList(array_str)
       * for (i <- 0 to array.size - 1) {
       * if (array.get(i).nonEmpty) {
       * merge_array.add(array.get(i).toUpperCase())
       * }
       * }
       * */
      var array = array_str.substring(1, array_str.length() - 1)
      val p = array.split(",")
      for (a <- 0 to p.length - 1) {
        if (p(a).nonEmpty) {
          merge_array.add(p(a).toUpperCase().trim())
        }
      }
    }
  }

  def mergeMax(time1_str: String, time2_str: String): String = {
    var result_time = ""
    try {

      if (time1_str != null && time1_str.nonEmpty && !time1_str.equals("null")) {
        result_time = time1_str
      }
      if (time2_str != null && time2_str.nonEmpty && !time2_str.equals("null")) {
        if (time2_str.compareTo(result_time) > 0) {
          result_time = time2_str
        }
      }

    } catch {
      case e: Exception =>
        e.printStackTrace()
        result_time = ""
    } finally {
    }
    result_time
  }

  def mergeApp_name(json_1: JSONObject, json_2: JSONObject): String = {
    var update_time1 = json_1.getString("update_time")
    var update_time2 = json_2.getString("update_time")
    var app_name = ""
    if (update_time1 == null || update_time1.isEmpty()) {
      update_time1 = "0"
    }

    if (update_time2 == null || update_time2.isEmpty()) {
      update_time2 = "0"
    }

    if (update_time2.compareTo(update_time1) > 0 && json_2.getString("app_name") != null && json_2.getString("app_name").nonEmpty) {
      app_name = json_2.getString("app_name")
    } else if (update_time1.compareTo(update_time2) > 0 && json_1.getString("app_name") != null && json_1.getString("app_name").nonEmpty) {
      app_name = json_1.getString("app_name")
    } else {
      if (json_1.getString("app_name") != null && json_1.getString("app_name").nonEmpty) {
        app_name = json_1.getString("app_name")
      } else if (json_2.getString("app_name") != null && json_2.getString("app_name").nonEmpty) {
        app_name = json_2.getString("app_name")
      }
    }
    app_name
  }

  def mergeString(str1: String, str2: String): String = {
    if (str1 != null && !str1.isEmpty && str1.toLowerCase != "null") {
      str1
    } else if (str2 != null && !str2.isEmpty && str2.toLowerCase != "null") {
      str2
    } else {
      ""
    }
  }

}