package mobvista.dmp.output.reyun import mobvista.dmp.common.{CommonSparkJob, MobvistaConstant} import org.apache.commons.cli.Options import org.apache.spark.sql.SaveMode /** * @package: mobvista.dmp.output.reyun * @author: wangjf * @date: 2021/9/14 * @time: 2:06 下午 * @email: jinfeng.wang@mobvista.com */ class UserInfo extends CommonSparkJob { override protected def buildOptions(): Options = { val options = new Options options.addOption("date", true, "[must] date") options.addOption("output", true, "[must] output") options.addOption("coalesce", true, "[must] coalesce") options } override protected def run(args: Array[String]): Int = { val commandLine = commParser.parse(options, args) if (!checkMustOption(commandLine)) { printUsage(options) return -1 } else { printOptions(commandLine) } val date = commandLine.getOptionValue("date") val output = commandLine.getOptionValue("output") val coalesce = Integer.parseInt(commandLine.getOptionValue("coalesce")) val spark = MobvistaConstant.createSparkSession(s"DmpUserInfo.${date}") try { val sql = Constant.user_info.replace("@date", date) val df = spark.sql(sql) df.repartition(coalesce) .write .mode(SaveMode.Overwrite) .option("orc.compress", "zlib") .orc(output) } finally { spark.stop() } 0 } } object UserInfo { def main(args: Array[String]): Unit = { new UserInfo().run(args) } }