package mobvista.dmp.datasource.age_gender

import java.net.URI

import mobvista.dmp.common.CommonSparkJob
import mobvista.dmp.datasource.age.mapreduce.Util
import org.apache.commons.cli.Options
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
import org.apache.spark.storage.StorageLevel

/**
  * @author wangjf
  */
class GetAgeGender extends CommonSparkJob with Serializable {

  override protected def run(args: Array[String]): Int = {

    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return -1
    } else {
      printOptions(commandLine)
    }

    val date = commandLine.getOptionValue("date")
    val ageOutput = commandLine.getOptionValue("ageOutput")
    val genderOutput = commandLine.getOptionValue("genderOutput")
    val business = commandLine.getOptionValue("business")
    //  val coalesce = commandLine.getOptionValue("coalesce", "100")

    val spark = SparkSession.builder()
      .appName("GetAgeGender." + business)
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "snappy")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .config("spark.kryo.registrationRequired", "false")
      .enableHiveSupport()
      .getOrCreate()

    val sc = spark.sparkContext
    sc.getConf.registerKryoClasses(Array(classOf[AgeGender]))

    val now = date.substring(0, 4).toInt

    FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(ageOutput), true)
    FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(genderOutput), true)
    try {

      spark.udf.register("check_deviceId", mobvista.dmp.common.MobvistaConstant.checkDeviceId _)
      spark.udf.register("check_birthday", Logic.check_birthday _)
      spark.udf.register("check_gender", Logic.check_gender _)
      val sql = if (business.equals("ga")) {
        Constant.ga_sql_lr.replace("@date", date)
          .replace("@check_deviceId", "check_deviceId(device_id)")
          .replace("@check_birthday", s"check_birthday($now,birth_year)")
          .replace("@check_gender", "check_gender(gender)")
      } else {
        Constant.dsp_profile_sql_lr.replace("@type", "dsp").replace("@date", date)
          .replace("@check_deviceId", "check_deviceId(device_id)")
          .replace("@check_birthday", s"check_birthday($now,birthday)")
          .replace("@check_gender", "check_gender(gender)")
      }
      val rdd = spark.sql(sql)
        .rdd
        .map(row =>
          AgeGender(row.getAs("device_id"), row.getAs("device_type"), row.getAs("birthday"), row.getAs("gender"), row.getAs("tag")))
        .persist(StorageLevel.MEMORY_ONLY_SER)

      //  process_age
      val age_rdd = rdd.filter(age => {
        val birthday = age.birthday
        Logic.check_birthday(now, birthday)
      }).map(age => {
        val device_id = age.device_id
        val device_type = age.device_type
        val birthday = age.birthday
        val tag = age.tag
        val label = Util.calcLabel(now - birthday.toInt).toString
        Row(device_id, "A", label, tag, device_type)
      })
      spark.createDataFrame(age_rdd, Constant.schema_age_gender)
        .write.mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .orc(ageOutput)

      //  process_gender
      val gender_rdd = rdd.filter(sex => {
        val gender = sex.gender
        Logic.check_gender(gender)
      }).map(sex => {
        val device_id = sex.device_id
        val device_type = sex.device_type
        val gender = sex.gender.toLowerCase
        val tag = sex.tag
        Row(device_id, "A", gender, tag, device_type)
      })
      spark.createDataFrame(gender_rdd, Constant.schema_age_gender)
        .write.mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .orc(genderOutput)

    } finally {
      sc.stop()
      spark.stop()
    }
    0
  }

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("date", true, "[must] date")
    options.addOption("business", true, "[must] business")
    options.addOption("genderOutput", true, "[must] genderOutput")
    options.addOption("ageOutput", true, "[must] ageOutput")
    //  options.addOption("coalesce", true, "[must] coalesce")
    options
  }
}

object GetAgeGender {
  def main(args: Array[String]): Unit = {
    new GetAgeGender().run(args)
  }
}