package mobvista.dmp.datasource.age_gender

import java.net.URI
import mobvista.dmp.common.CommonSparkJob
import org.apache.commons.cli.Options
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.{Row, SaveMode, SparkSession}

class MergeDeviceGenderLR extends CommonSparkJob with Serializable {
  val wellSplit = "#"
  val TAB_DELIMITER = "\t"


  override protected def run(args: Array[String]): Int = {

    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return -1
    } else {
      printOptions(commandLine)
    }


    val date = commandLine.getOptionValue("date")
    val gender_predict_input = commandLine.getOptionValue("gender_predict_input")
    val gender_device_output = commandLine.getOptionValue("gender_device_output")
    val gender_threshold_dict_input = commandLine.getOptionValue("gender_threshold_dict_input")
    val parallelism = commandLine.getOptionValue("parallelism")

    val spark = SparkSession.builder()
      .appName("MergeDeviceGenderLR")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "snappy")
        .config("spark.sql.broadcastTimeout","2400")
        .config("spark.sql.autoBroadcastJoinThreshold","209715200")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()
    val sc = spark.sparkContext

    FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(gender_device_output), true)

    try {

      val gender_thdict = sc.textFile(gender_threshold_dict_input).map(_.split("\t"))
        .map(r => (r(0),  r(1))).collectAsMap()

      val genderMap = sc.broadcast(gender_thdict)

      val  gender_rdd = sc.textFile(gender_predict_input).mapPartitions(calGenderLogic(_,genderMap))

      spark.createDataFrame(gender_rdd, Constant.gender_schema).createOrReplaceTempView("ods_gender_prediction_tab")

      val ods_gender_sql = Constant.dmp_device_gender.replace("@date", date)

      spark.sql(ods_gender_sql).coalesce(parallelism.toInt)
        .write.mode(SaveMode.Overwrite)
        .option("orc.compress", "zlib")
        .orc(gender_device_output)


    } finally {
      sc.stop()
      spark.stop()
    }
    0
  }

  def calGenderLogic(rows: Iterator[String], bGenderMap: Broadcast[scala.collection.Map[String, String]]) : Iterator[Row]  ={
    val thresholdMap = bGenderMap.value;
    /*B  663
ALOW  646
AHIGH  647
APERCENTAGE  0.287481121164*/
    val bTd = thresholdMap.get("B").getOrElse("663")toFloat
    val aLowBTd = thresholdMap.get("ALOW").getOrElse("646")toFloat
    val aHighBTd = thresholdMap.get("AHIGH").getOrElse("647")toFloat
    val aPerBTd = thresholdMap.get("APERCENTAGE").getOrElse("0.287481121164")toFloat

    rows.map(line => {
       val fields = line.split(TAB_DELIMITER)
      val deviceId = fields(0).split(wellSplit)(0)
      val deviceType = fields(0).split(wellSplit)(1)  //  A是ios (idfa)  B是安卓(gaid)
      var device_Type ="idfa"
      val percentage = fields(1)
      var gender="m"
      if("B".equalsIgnoreCase(deviceType) && percentage.toFloat * 1000 < bTd ){
        gender ="f"
        device_Type="gaid"
      }else if("B".equalsIgnoreCase(deviceType) && percentage.toFloat * 1000 >= bTd ){
        device_Type="gaid"
      }else if("A".equalsIgnoreCase(deviceType) && percentage.toFloat * 1000 < aLowBTd){
        gender = "f"
      }/*else if("A".equalsIgnoreCase(deviceType) && percentage.toFloat * 100 > aHighBTd){
        gender = "m"
      }*/else if("A".equalsIgnoreCase(deviceType) && percentage.toFloat * 1000 >= aLowBTd && percentage.toFloat * 1000 <= aHighBTd){
        val randnum = (new util.Random).nextInt(100)
        if(randnum > aPerBTd * 100 ){
          gender = "m"
        }else{
          gender = "f"
        }
      }
      Row(deviceId,device_Type,gender,"calc")
    })

  }


  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("date", true, "[must] date")
    options.addOption("gender_predict_input", true, "[must] gender_predict_input")
    options.addOption("gender_device_output", true, "[must] gender_device_output")
    options.addOption("gender_threshold_dict_input", true, "[must] gender_threshold_dict_input")
    options.addOption("parallelism", true, "[must] parallelism")
    options
  }
}


object MergeDeviceGenderLR {
  def main(args: Array[String]): Unit = {
    new MergeDeviceGenderLR().run(args)
  }
}
