#!/bin/sh source ../dmp_env.sh # QUEUE_NAME="dataplatform" # ScheduleTime=${ScheduleTime:-$1} date=$(date +"%Y-%m-%d" -d "$ScheduleTime") yes_bef2_day=`date -d "$ScheduleTime 2 days ago" +%Y%m%d` yes_bef1_day=`date -d "$ScheduleTime 1 days ago" +%Y%m%d` OUTPUT_PATH="${ODS_DMP_USER_INFO_ALL}_v2/${yes_bef1_day}" unmount_day=`date -d "$ScheduleTime 7 days ago" +%Y%m%d` UNMOUNT_OUTPUT_PATH="${ODS_DMP_USER_INFO_ALL}_v2/${unmount_day}" date_path=$(date +%Y/%m/%d -d "-1 day $ScheduleTime") INPUT_PUBLISH_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/realtime_service_device/${date_path}/*/*/*" check_await ${ODS_DMP_USER_INFO_ALL}_v2/${yes_bef2_day}/_SUCCESS check_await ${ODS_DMP_USER_INFO_DAILY}/${yes_bef1_day}/_SUCCESS hadoop fs -rmr ${OUTPUT_PATH} # --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ # --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ spark-submit --class mobvista.dmp.datasource.device.OdsDmpUserInfoAllV2 \ --name "OdsDmpUserInfoAllV2.${yes_bef1_day}" \ --conf spark.sql.shuffle.partitions=12000 \ --conf spark.default.parallelism=2000 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.storage.memoryFraction=0.4 \ --conf spark.shuffle.memoryFraction=0.4 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 12G \ --driver-memory 6G \ --executor-cores 5 \ --num-executors 100 \ ../${JAR} \ -cur_day ${date} -output ${OUTPUT_PATH} -coalesce 3000 -input ${INPUT_PUBLISH_PATH} if [[ $? -ne 0 ]]; then exit 255 fi mount_partition "ods_dmp_user_info_all_v2" "dt='${yes_bef1_day}'" "${OUTPUT_PATH}" if [[ $? -ne 0 ]]; then exit 255 fi unmount_partition "ods_dmp_user_info_all_v2" "dt='${unmount_day}'" "${UNMOUNT_OUTPUT_PATH}"