package mobvista.dmp.common import org.apache.commons.cli.{BasicParser, CommandLine, HelpFormatter, Option, Options} import org.apache.commons.lang.StringUtils import scala.collection.JavaConversions._ /** * spark应用程序模板类 */ abstract class CommonSparkJob { // oaid的正则有以下两种形式didPtn和oaidAnotherPtn,参考 https://www.h5w3.com/33786.html val QUERY = "QUERY" val ENCODING = "UTF-8" val HTTPPREFIX = "http://test.com" val DATA_SPLIT = "\t" val didPtn = "^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$" val imeiPtn = "^([0-9]{15,17})$" val imeiMd5Ptn = "^([a-fA-F0-9]{32})$" val andriodIdPtn = "^[a-zA-Z0-9]{16}$" val oaidAnotherPtn = "^([a-fA-F0-9]{1,64})$" val md5Ptn = """^([0-9a-zA-Z])\1{30,32}""" val allZero = "00000000-0000-0000-0000-000000000000" val options = buildOptions() val commParser = new BasicParser protected def run(args: Array[String]): Int protected def checkMustOption(commands: CommandLine): Boolean = { val collect = options.getOptions collect.foreach(opt => { val option = opt.asInstanceOf[Option] val desc = option.getDescription if (desc.contains("[must]")) { val value = commands.getOptionValue(option.getOpt) if (StringUtils.isEmpty(value)) { println("Please set paramter " + option.getArgName) return false } } }) true } protected def buildOptions(): Options = { val options = new Options options.addOption("input", true, "[must] input path") options.addOption("output", true, "[must] output path") options.addOption("parallelism", true, "parallelism of shuffle operation") options.addOption("coalesce", true, "number of output files") options } protected def printOptions (commandLine: CommandLine): Unit = { val collect = options.getOptions println("****************************************") collect.foreach(option => { val opt = option.asInstanceOf[Option].getOpt val value = commandLine.getOptionValue(opt) println(s"* $opt = $value") }) println("****************************************") } protected def printUsage(options: Options): Unit = { val help = new HelpFormatter help.printHelp(this.getClass.getSimpleName, options) } def splitFun(line : String, split : String) : Array[String] = { if (split == null) { StringUtils.splitPreserveAllTokens(line, DATA_SPLIT, -1) } else { StringUtils.splitPreserveAllTokens(line, split, -1) } } def splitFun(line : String) : Array[String] = { splitFun(line, null) } }