package mobvista.dmp.datasource.newtag

import java.net.URI
import mobvista.dmp.common.CommonSparkJob
import org.apache.commons.cli.Options
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.poi.ss.usermodel.{Cell, WorkbookFactory}
import org.apache.spark.sql.SparkSession

import java.io.InputStream
import scala.collection.mutable.ArrayBuffer
//  import scala.tools.nsc.interpreter.InputStream

class ImportPkgTags extends CommonSparkJob with Serializable {
  override protected def run(args: Array[String]): Int = {
    val commandLine = commParser.parse(options, args)
    if (!checkMustOption(commandLine)) {
      printUsage(options)
      return 1
    } else {
      printOptions(commandLine)
    }

    val output = commandLine.getOptionValue("output")
    val newPath = commandLine.getOptionValue("newPath")

    val spark = SparkSession
      .builder()
      .appName("dmp_ImportPkgTags_fengliang")
      .config("spark.rdd.compress", "true")
      .config("spark.io.compression.codec", "snappy")
      .config("spark.sql.orc.filterPushdown", "true")
      .config("spark.sql.orc.cacheMetadata", "true")
      .config("spark.sql.warehouse.dir", "s3://mob-emr-test/spark-warehouse")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .enableHiveSupport()
      .getOrCreate()
    import spark.implicits._
    val sc = spark.sparkContext

    try {
      val fs = FileSystem.get(URI.create(newPath), sc.hadoopConfiguration)
      val filePath = new Path(newPath)
      if (fs.exists(filePath)) {
        val dailyData = parseExcel(fs.open(filePath))
        sc.makeRDD(dailyData)
          .filter(x => StringUtils.isNotEmpty(x.package_name))
          .toDF()
          .createOrReplaceTempView("t_data_daily")
      } else {
        return 1
      }

      var sql =
        """
          |select package_name, platform, tag_type, tag
          |from (
          |  select package_name, platform, tag_type, tag1 as tag
          |  from t_data_daily t
          |  union all
          |  select package_name, platform, tag_type, tag2 as tag
          |  from t_data_daily t
          |  union all
          |  select package_name, platform, tag_type, tag3 as tag
          |  from t_data_daily t
          |  union all
          |  select package_name, platform, tag_type, tag4 as tag
          |  from t_data_daily t
          |  union all
          |  select package_name, platform, tag_type, tag5 as tag
          |  from t_data_daily t
          |) t
          |where t.tag is not null and t.tag <> ''
          |group by package_name, platform, tag_type, tag
        """.stripMargin
      spark.sql(sql)
        .createOrReplaceTempView("t_package_tags")

      sql =
        """
          |select /*+ MAPJOIN(a) */ b.package_name, b.platform,
          |case when trim(b.tag_type)='分类' then 'category'
          |     when trim(b.tag_type)='题材' then 'theme'
          |     when trim(b.tag_type)='美术风格' then 'style'
          |     when trim(b.tag_type)='游戏体验' then 'experience'
          |     else 'other'
          | end as tag_type,
          |a.first_tag, a.second_tag, b.tag as comment
          |from dwh.dim_category_new a
          |join t_package_tags b on a.comment=b.tag
        """.stripMargin
      spark.sql(sql)
        .createOrReplaceTempView("t_package_two_tags")

      sql =
        """
          |select a.package_name, a.platform, a.tag_type, a.first_tag, a.second_tag, a.comment
          |from (
          |  select a.package_name, a.platform, a.tag_type, a.first_tag, a.second_tag, a.comment
          |  from dwh.dim_package_tags a
          |  left outer join t_package_two_tags b
          |    on a.package_name=b.package_name and a.tag_type=b.tag_type
          |  where b.package_name is null
          |  
          |  union all
          |  
          |  select a.package_name, a.platform, a.tag_type, a.first_tag, a.second_tag, a.comment
          |  from t_package_two_tags a
          |) a
          |group by a.package_name, a.platform, a.tag_type, a.first_tag, a.second_tag, a.comment
        """.stripMargin
      FileSystem.get(new URI(s"s3://mob-emr-test"), sc.hadoopConfiguration).delete(new Path(output), true)

      spark.sql(sql)
        .map(_.mkString(DATA_SPLIT))
        .rdd
        .saveAsTextFile(output, classOf[GzipCodec])

    } finally {
      if (spark != null) {
        spark.stop()
      }
    }
    0
  }

  def parseExcel(inputStream: InputStream): Array[DimPackageTags] = {
    val workbook = WorkbookFactory.create(inputStream, "")
    val sheet = workbook.getSheetAt(0)
    val buffer = new ArrayBuffer[DimPackageTags]()
    for (i <- 1 until sheet.getLastRowNum + 1) {
      val row = sheet.getRow(i)
      if (row != null) {
        val deviceId = getCellValue(row.getCell(0)).trim
        val platform = getCellValue(row.getCell(1)).trim
        var tagType = getCellValue(row.getCell(2)).trim
        val tag1 = getCellValue(row.getCell(3)).trim
        val tag2 = getCellValue(row.getCell(4)).trim
        val tag3 = getCellValue(row.getCell(5)).trim
        val tag4 = getCellValue(row.getCell(6)).trim
        val tag5 = getCellValue(row.getCell(7)).trim

        buffer += DimPackageTags(deviceId, platform.toLowerCase(), tagType,
          tag1, tag2, tag3, tag4, tag5)
      }
    }
    buffer.toArray
  }

  def getCellValue(cell: Cell): String = {
    if (cell != null) {
      cell.getCellType match {
        case 0 => {
          return String.valueOf(cell.getNumericCellValue.toLong)
        }
        case _ => {
          return cell.getStringCellValue
        }
      }
    } else {
      ""
    }
  }

  override protected def buildOptions(): Options = {
    val options = new Options
    options.addOption("newPath", true, "[must] new excel file path")
    options.addOption("output", true, "[must] output path")
  }
}

object ImportPkgTags {
  def main(args: Array[String]): Unit = {
    new ImportPkgTags().run(args)
  }
}

case class DimPackageTags(package_name: String, platform: String, tag_type: String,
                          tag1: String, tag2: String, tag3: String, tag4: String, tag5: String)