Commit f6e0e5df by fan.jiang

id3876827262021090301 com.xunmeng.pinduoduo to rtdmp s3

parent 24ba3ada
......@@ -3,12 +3,44 @@ source ../../dmp_env.sh
dt_today=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d")
dt_slash_today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
dt_day=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d")
dt_slash_day=$(date -d "$ScheduleTime" +"%Y/%m/%d")
check_await "${DM_INSTALL_LIST}_v2/${dt_slash_today}/dsp_req/_SUCCESS"
check_await "${DM_INSTALL_LIST}_v2/${dt_slash_today}/btop/_SUCCESS"
check_await "${DM_INSTALL_LIST}_v2/${dt_slash_day}/TO/_SUCCESS"
OUTPUT_PATH1="${RTDMP_TMP_PACKAGE_NAME_PATH}/id1142110895/${dt_slash_today}/"
OUTPUT_PATH2="${RTDMP_TMP_PACKAGE_NAME_PATH}/id3876827262021090301/${dt_slash_today}/"
OUTPUT_PATH3="${RTDMP_TMP_PACKAGE_NAME_PATH}/id3332062892021090301/${dt_slash_today}/"
OUTPUT_PATH4="${RTDMP_TMP_PACKAGE_NAME_PATH}/id13403763232021090301/${dt_slash_today}/"
OUTPUT_PATH5="${RTDMP_TMP_PACKAGE_NAME_PATH}/id10442830592021090301/${dt_slash_today}/"
OUTPUT_PATH6="${RTDMP_TMP_PACKAGE_NAME_PATH}/com_xunmeng_pinduoduo_bes/${dt_slash_today}/"
OUTPUT_PATH7="${RTDMP_TMP_PACKAGE_NAME_PATH}/com_ss_android_ugc_aweme_iqiyi/${dt_slash_today}/"
OUTPUT_PATH8="${RTDMP_TMP_PACKAGE_NAME_PATH}/id11421108952021090302/${dt_slash_today}/"
OUTPUT_PATH9="${RTDMP_TMP_PACKAGE_NAME_PATH}/com_xunmeng_pinduoduo_oppoziyou/${dt_slash_today}/"
OUTPUT_PATH10="${RTDMP_TMP_PACKAGE_NAME_PATH}/com_xunmeng_pinduoduo_oppoziyou_notinstall/${dt_slash_today}/"
OUTPUT_PATH11="${RTDMP_TMP_PACKAGE_NAME_PATH}/com_xunmeng_pinduoduo_oppoziyou_hist_notinstall/${dt_slash_today}/"
OUTPUT_PATH12="${RTDMP_TMP_PACKAGE_NAME_PATH}/com_xunmeng_pinduoduo_oppolianmeng/${dt_slash_today}/"
OUTPUT_PATH13="${RTDMP_TMP_PACKAGE_NAME_PATH}/com_xunmeng_pinduoduo_oppolianmeng_hist1year_notinstall/${dt_slash_today}/"
OUTPUT_PATH14="${RTDMP_TMP_PACKAGE_NAME_PATH}/com_xunmeng_pinduoduo_oppolianmeng_histhalfyear_notinstall/${dt_slash_today}/"
OUTPUT_PATH15="${RTDMP_TMP_PACKAGE_NAME_PATH}/com_xunmeng_pinduoduo/${dt_slash_today}/"
hadoop fs -rm -r "${OUTPUT_PATH1}"
hadoop fs -rm -r "${OUTPUT_PATH2}"
hadoop fs -rm -r "${OUTPUT_PATH3}"
hadoop fs -rm -r "${OUTPUT_PATH4}"
hadoop fs -rm -r "${OUTPUT_PATH5}"
hadoop fs -rm -r "${OUTPUT_PATH6}"
hadoop fs -rm -r "${OUTPUT_PATH7}"
hadoop fs -rm -r "${OUTPUT_PATH8}"
hadoop fs -rm -r "${OUTPUT_PATH9}"
hadoop fs -rm -r "${OUTPUT_PATH10}"
hadoop fs -rm -r "${OUTPUT_PATH11}"
hadoop fs -rm -r "${OUTPUT_PATH12}"
hadoop fs -rm -r "${OUTPUT_PATH13}"
hadoop fs -rm -r "${OUTPUT_PATH14}"
hadoop fs -rm -r "${OUTPUT_PATH15}"
spark-submit --class mobvista.dmp.datasource.dm.RtdmpTmpId1142110895 \
......@@ -18,9 +50,14 @@ spark-submit --class mobvista.dmp.datasource.dm.RtdmpTmpId1142110895 \
--conf spark.driver.maxResultSize=4g \
--conf spark.network.timeout=720s \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 70 \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 130 \
../../${JAR} -dt_today ${dt_today} -output1 ${OUTPUT_PATH1} \
-coalesce 420
-dt_day ${dt_day} \
-output2 ${OUTPUT_PATH2} -output3 ${OUTPUT_PATH3} -output4 ${OUTPUT_PATH4} -output5 ${OUTPUT_PATH5} \
-output6 ${OUTPUT_PATH6} -output7 ${OUTPUT_PATH7} -output8 ${OUTPUT_PATH8} -output9 ${OUTPUT_PATH9} \
-output10 ${OUTPUT_PATH10} -output11 ${OUTPUT_PATH11} -output12 ${OUTPUT_PATH12} -output13 ${OUTPUT_PATH13} \
-output14 ${OUTPUT_PATH14} -output15 ${OUTPUT_PATH15} \
-coalesce 780
if [[ $? -ne 0 ]]; then
......
......@@ -21,7 +21,22 @@ class RtdmpTmpId1142110895 extends CommonSparkJob with Serializable {
val options = new Options
options.addOption("coalesce", true, "[must] coalesce")
options.addOption("output1", true, "[must] output1")
options.addOption("output2", true, "[must] output2")
options.addOption("output3", true, "[must] output3")
options.addOption("output4", true, "[must] output4")
options.addOption("output5", true, "[must] output5")
options.addOption("output6", true, "[must] output6")
options.addOption("output7", true, "[must] output7")
options.addOption("output8", true, "[must] output8")
options.addOption("output9", true, "[must] output9")
options.addOption("output10", true, "[must] output10")
options.addOption("output11", true, "[must] output11")
options.addOption("output12", true, "[must] output12")
options.addOption("output13", true, "[must] output13")
options.addOption("output14", true, "[must] output14")
options.addOption("output15", true, "[must] output15")
options.addOption("dt_today", true, "[must] dt_today")
options.addOption("dt_day", true, "[must] dt_day")
options
}
......@@ -34,7 +49,22 @@ class RtdmpTmpId1142110895 extends CommonSparkJob with Serializable {
val coalesce = commandLine.getOptionValue("coalesce")
val output1 = commandLine.getOptionValue("output1")
val output2 = commandLine.getOptionValue("output2")
val output3 = commandLine.getOptionValue("output3")
val output4 = commandLine.getOptionValue("output4")
val output5 = commandLine.getOptionValue("output5")
val output6 = commandLine.getOptionValue("output6")
val output7 = commandLine.getOptionValue("output7")
val output8 = commandLine.getOptionValue("output8")
val output9 = commandLine.getOptionValue("output9")
val output10 = commandLine.getOptionValue("output10")
val output11 = commandLine.getOptionValue("output11")
val output12 = commandLine.getOptionValue("output12")
val output13 = commandLine.getOptionValue("output13")
val output14 = commandLine.getOptionValue("output14")
val output15 = commandLine.getOptionValue("output15")
val dt_today = commandLine.getOptionValue("dt_today")
val dt_day = commandLine.getOptionValue("dt_day")
val spark = SparkSession.builder()
.appName("RtdmpTmpId1142110895")
......@@ -50,6 +80,20 @@ class RtdmpTmpId1142110895 extends CommonSparkJob with Serializable {
import spark.implicits._
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output1), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output2), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output3), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output4), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output5), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output6), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output7), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output8), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output9), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output10), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output11), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output12), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output13), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output14), true)
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(output15), true)
try {
val sql1=
......@@ -60,6 +104,118 @@ class RtdmpTmpId1142110895 extends CommonSparkJob with Serializable {
spark.sql(sql1).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output1}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql2=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('3876827262021090301','id3876827262021090301') and business='dsp_req'
""".stripMargin
spark.sql(sql2).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output2}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql3=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('3332062892021090301','id3332062892021090301') and business='dsp_req'
""".stripMargin
spark.sql(sql3).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output3}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql4=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('13403763232021090301','id13403763232021090301') and business='dsp_req'
""".stripMargin
spark.sql(sql4).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output4}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql5=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('10442830592021090301','id10442830592021090301') and business='dsp_req'
""".stripMargin
spark.sql(sql5).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output5}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql6=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('com.xunmeng.pinduoduo_bes') and business='dsp_req'
""".stripMargin
spark.sql(sql6).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output6}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql7=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('com.ss.android.ugc.aweme_iqiyi') and business='dsp_req'
""".stripMargin
spark.sql(sql7).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output7}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql8=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('11421108952021090302','id11421108952021090302') and business='dsp_req'
""".stripMargin
spark.sql(sql8).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output8}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql9=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('com.xunmeng.pinduoduo_oppoziyou') and business='dsp_req'
""".stripMargin
spark.sql(sql9).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output9}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql10=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('com.xunmeng.pinduoduo_oppoziyou_notinstall') and business='dsp_req'
""".stripMargin
spark.sql(sql10).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output10}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql11=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('com.xunmeng.pinduoduo_oppoziyou_hist_notinstall') and business='dsp_req'
""".stripMargin
spark.sql(sql11).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output11}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql12=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('com.xunmeng.pinduoduo_oppolianmeng') and business='dsp_req'
""".stripMargin
spark.sql(sql12).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output12}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql13=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('com.xunmeng.pinduoduo_oppolianmeng_hist1year_notinstall') and business='dsp_req'
""".stripMargin
spark.sql(sql13).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output13}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql14=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where dt='${dt_today}' and package_name in ('com.xunmeng.pinduoduo_oppolianmeng_histhalfyear_notinstall') and business='dsp_req'
""".stripMargin
spark.sql(sql14).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output14}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
val sql15=
s"""
|select device_id, device_type from dwh.dm_install_list_v2 where ( (dt='${dt_today}' and business in ('btop')) or (dt='${dt_day}' and business in ('TO')) ) and package_name in ('com.xunmeng.pinduoduo')
""".stripMargin
spark.sql(sql15).rdd.flatMap(buildRes(_)).coalesce(coalesce.toInt)
.saveAsNewAPIHadoopFile(s"${output15}", classOf[Text], classOf[Text], classOf[TextMultipleOutputFormat], spark.sparkContext.hadoopConfiguration)
} finally {
spark.stop()
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment