package mobvista.dmp.datasource.newtag

import java.net.URI
import mobvista.dmp.common.CommonSparkJob
import org.apache.commons.cli.Options
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.poi.ss.usermodel.{Cell, WorkbookFactory}
import org.apache.spark.sql.SparkSession

import java.io.InputStream
import scala.collection.mutable.ArrayBuffer
//  import scala.tools.nsc.interpreter.InputStream

class ImportCampaignTags extends CommonSparkJob with Serializable {
  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return 1
    } else {
      printOptions(commandLine)
    }

    val output = commandLine.getOptionValue("output")
    val newPath = commandLine.getOptionValue("newPath")

    val spark = SparkSession
      .builder()
      .appName("dmp_ImportCampaignTags_fengliang")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "snappy")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.orc.cacheMetadata", "true")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()
    import spark.implicits._
    val sc = spark.sparkContext

    try {
      val fs = FileSystem.get(URI.create(newPath), sc.hadoopConfiguration)
      val filePath = new Path(newPath)
      if (fs.exists(filePath)) {
        val dailyData = parseExcel(fs.open(filePath))
        sc.makeRDD(dailyData)
          .filter(x => StringUtils.isNotEmpty(x.campaign_id))
          .toDF()
          .createOrReplaceTempView("t_data_daily")
      } else {
        return 1
      }

      val sql =
        """
          |select a.campaign_id, a.platform, a.tag, a.comment
          |from (
          |  select a.campaign_id, a.platform, a.tag, a.comment
          |  from dwh.dim_campaign_tags a
          |  left outer join t_data_daily b
          |    on a.campaign_id=b.campaign_id and a.platform=b.platform
          |  where b.campaign_id is null
          |
          |  union all
          |
          |  select a.campaign_id, a.platform,
          |  case when trim(a.comment)='重度游戏' then 'hardcoregame'
          |   when trim(a.comment)='轻度游戏' then 'lightgame'
          |   when trim(a.comment)='独立游戏' then 'indiegame'
          |   else 'other' end as tag,
          |  a.comment
          |  from t_data_daily a
          |) a
          |group by a.campaign_id, a.platform, a.tag, a.comment
        """.stripMargin

      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output),true)

      spark.sql(sql)
        .map(_.mkString(DATA_SPLIT))
        .rdd
        .saveAsTextFile(output, classOf[GzipCodec])

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }

  def parseExcel(inputStream: InputStream): Array[DimCampainTags] = {
    val workbook = WorkbookFactory.create(inputStream, "")
    val sheet = workbook.getSheetAt(0)
    val buffer = new ArrayBuffer[DimCampainTags]()
    for (i <- 1 until sheet.getLastRowNum + 1) {
      val row = sheet.getRow(i)
      val campainId = getCellValue(row.getCell(0)).trim
      val platform = getCellValue(row.getCell(1)).trim
      val tag = getCellValue(row.getCell(2)).trim

      buffer += DimCampainTags(campainId, platform, tag)
    }
    buffer.toArray
  }

  def getCellValue(cell: Cell): String = {
    if (cell != null) {
      cell.getCellType match {
        case 0 => {
          return String.valueOf(cell.getNumericCellValue.toLong)
        }
        case _ => {
          return cell.getStringCellValue
        }
      }
    } else {
      ""
    }
  }

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("newPath", true, "[must] new excel file path")
    options.addOption("output", true, "[must] output path")
  }
}

object ImportCampaignTags {
  def main(args: Array[String]): Unit = {
    new ImportCampaignTags().run(args)
  }
}

case class DimCampainTags(campaign_id: String, platform: String, comment: String)