package mobvista.dmp.datasource.app_info_tag
import org.apache.spark.sql.{SparkSession, _}
import org.apache.spark.sql.types._
object App_info_adr_3s extends Serializable {
  def main(args: Array[String]) {
    val spark = SparkSession.builder()
      .getOrCreate()
    val loadTime = spark.conf.get("spark.app.loadTime")
    var year = loadTime.substring(0, 4)
    var month = loadTime.substring(4, 6)
    val day = loadTime.substring(6, 8)
    val output_path = spark.conf.get("spark.app.output_path")
    try {
     // val android_file = "s3://mob-emr-test/leo.liang/dmp_dummper/3s_android.json.log"
      val android_file = "s3://mob-emr-test/leo.liang/dmp_dummper/all_3s_android.json.log"
      val result_rdd = spark.sparkContext.textFile(android_file).map { x =>
        {
          import com.alibaba.fastjson.{JSON, JSONArray, JSONObject}
          val json = JSON.parseObject(x)
          val package_name = json.get("package_name")
          val category = json.getString("category")
          val cat_key = json.getString("cat_key")
          val category_array = new JSONArray()
          val cate_json = new JSONObject()
          cate_json.put("code", cat_key)
          cate_json.put("name", category)
         // cate_json.put(cat_key, category)
          category_array.add(cate_json)
          val cat_keys_json = json.get("cat_keys_json")
          val number_ratings = json.get("number_ratings")
          val downloads = json.get("downloads")
          val market_url = json.get("market_url")
          val market_update = json.getLong("market_update")
          val date2 = new java.util.Date()
          date2.setTime(market_update * 1000l)
          val format2 = new java.text.SimpleDateFormat("yyyyMMdd")
          val update_time = format2.format(date2)
          val lang_data = json.getJSONObject("lang_data")
          var app_name_list = ""
          val itor = lang_data.entrySet().iterator()
          while (itor.hasNext()) {
            val app_name = JSON.parseObject(itor.next().getValue.toString()).getString("app_name")
            if (!app_name_list.contains(app_name)) {
              app_name_list = app_name_list + "" + app_name + ","
            }
          }
          if (!app_name_list.isEmpty()) {
            app_name_list = app_name_list.substring(0, app_name_list.length() - 1)
          }
          val artist_name = json.get("developer")
          val content_advisory_rating = json.get("content_rating")
          Row(
            package_name,
            app_name_list,
            category_array.toString(),
            number_ratings,
            downloads,
            market_url,
            update_time,
            artist_name,
            content_advisory_rating
          )
        }
      }
       
      val schema = StructType(Array(
        StructField("package_name", StringType, true),
        StructField("app_name", StringType, true),
        StructField("category_list", StringType, true),
        StructField("user_rating_count", StringType, true),
        StructField("download", StringType, true),
        StructField("track_view_url", StringType, true),
        StructField("update_time", StringType, true),
        StructField("artist_name", StringType, true),
        StructField("content_advisory_rating", StringType, true)
      ))
        
      var df = spark.createDataFrame(result_rdd, schema)
     
    df.coalesce(100).write.format("orc").mode(SaveMode.Overwrite).save(output_path)
    } catch {
      case e: Exception =>
        e.printStackTrace()
    } finally {
    }
    spark.sparkContext.stop()
  }

}