package mobvista.dmp.datasource.app_info_tag
import com.alibaba.fastjson.{JSONArray, JSONObject}
import org.apache.spark.SparkConf
import org.apache.spark.sql.{SparkSession, _}
import org.apache.spark.sql.types._
object Etl_app_info_ios_repair {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    val spark = SparkSession.builder().appName("JdbcOperation").config(conf)
      .enableHiveSupport()
      .getOrCreate()
    spark.conf.set("spark.serializer",
      "org.apache.spark.serializer.KryoSerializer");
    spark.conf.set("spark.kryoserializer.buffer.max", "300m")
    spark.conf.set(
      "spark.hadoop.mapreduce.input.fileinputformat.split.minsize",
      "10649600")
    spark.conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    spark.conf.set("spark.kryoserializer.buffer.mb", "128")
    spark.conf.set(
      "spark.hadoop.mapreduce.input.fileinputformat.split.maxsize",
      "3145721600");
    spark.conf.set("spark.speculation", "true")
    spark.conf.set("spark.speculation.interval", "500ms")
    spark.conf.set("spark.speculation.multiplier", "5")
    val jobContext = spark.sqlContext
    val loadTime = spark.conf.get("spark.app.loadTime")
    val etlTime = spark.conf.get("spark.app.etlTime")
    var year = loadTime.substring(0, 4)
    var month = loadTime.substring(4, 6)
    val day = loadTime.substring(6, 8)
    val output_path = spark.conf.get("spark.app.output_path")
    try {
      val ios_rdd = jobContext.sql("select * from dwh.dim_app_info_ios where year='" + year + "' and month='" + month + "' and day='" + day + "' and update_time<='"+etlTime+"'").rdd.map { p =>
        {
          Row(
            p(0),
            p(1),
            p(2),
            p(3),
            p(4),
            p(5),
            p(6),
            p(7),
            p(8),
            p(9),
            p(10),
            p(11),
            p(12))
        }
      }

      val ios_schema = StructType(Array(
        StructField("package_name", StringType, true),
        StructField("app_name", StringType, true),
        StructField("file_size_bytes", StringType, true),
        StructField("support_language", StringType, true),
        StructField("bundle_id", StringType, true),
        StructField("release_date", StringType, true),
        StructField("primary_category", StringType, true),
        StructField("primary_category_id", StringType, true),
        StructField("category_list", StringType, true),
        StructField("category_id_list", StringType, true),
        StructField("user_rating_count", StringType, true),
        StructField("track_view_url", StringType, true),
        StructField("update_time", StringType, true)))
      var result = jobContext.createDataFrame(ios_rdd, ios_schema)
      result.coalesce(10).write.format("orc").mode(SaveMode.Overwrite).save(output_path)
    } catch {
      case e: Exception =>
        e.printStackTrace()
    } finally {
    }
    spark.sparkContext.stop()
  }
  def mergeJSONArray(array: JSONArray, merge_array: scala.collection.mutable.Set[String]): Unit = {
    for (a <- 0 to array.size() - 1) {
      var obj = array.getJSONObject(a)
      val code = obj.get("code").toString()
      val name = obj.get("name").toString()
      merge_array.add(obj.toString())
    }
  }
  def mergeArrayStr(array_str: String, merge_array: scala.collection.mutable.Set[String]): Unit = {
    if (array_str.startsWith("[") && array_str.endsWith("]")) {
      var array = array_str.substring(1, array_str.length() - 1)
      val p = array.split(",")
      for (a <- 0 to p.length - 1) {
        if (p(a).nonEmpty) {
          merge_array.add(p(a))
        }
      }
    }
  }

  def mergeMax(time1_str: String, time2_str: String): String = {
    var result_time = ""
    try {

      if (time1_str != null && time1_str.nonEmpty) {
        result_time = time1_str
      }
      if (time2_str != null && time2_str.nonEmpty) {
        if (time2_str.compareTo(result_time) > 0) {
          result_time = time2_str
        }
      }

    } catch {
      case e: Exception =>
        e.printStackTrace()
        result_time = ""
    } finally {
    }
    result_time
  }

  def mergeApp_name(json_1: JSONObject, json_2: JSONObject): String = {
    var update_time1 = json_1.getString("update_time")
    var update_time2 = json_2.getString("update_time")
    var app_name = ""
    if (update_time1 == null || update_time1.isEmpty()) {
      update_time1 = "0"
    }

    if (update_time2 == null || update_time2.isEmpty()) {
      update_time2 = "0"
    }

    if (update_time2.compareTo(update_time1) > 0 && json_2.getString("app_name") != null && json_2.getString("app_name").nonEmpty) {
      app_name = json_2.getString("app_name")
    } else if (update_time1.compareTo(update_time2) > 0 && json_1.getString("app_name") != null && json_1.getString("app_name").nonEmpty) {
      app_name = json_1.getString("app_name")
    } else {
      if (json_1.getString("app_name") != null && json_1.getString("app_name").nonEmpty) {
        app_name = json_1.getString("app_name")
      } else if (json_2.getString("app_name") != null && json_2.getString("app_name").nonEmpty) {
        app_name = json_2.getString("app_name")
      }
    }
    app_name
  }

}