ods_dmp_user_info_all_v2.sh 1.83 KB
#!/bin/sh

source ../dmp_env.sh
# QUEUE_NAME="dataplatform"

# ScheduleTime=${ScheduleTime:-$1}
date=$(date +"%Y-%m-%d" -d "$ScheduleTime")

yes_bef2_day=`date -d "$ScheduleTime 2 days ago" +%Y%m%d`
yes_bef1_day=`date -d "$ScheduleTime 1 days ago" +%Y%m%d`


OUTPUT_PATH="${ODS_DMP_USER_INFO_ALL}_v2/${yes_bef1_day}"

unmount_day=`date -d "$ScheduleTime 7 days ago" +%Y%m%d`
UNMOUNT_OUTPUT_PATH="${ODS_DMP_USER_INFO_ALL}_v2/${unmount_day}"

date_path=$(date +%Y/%m/%d -d "-1 day $ScheduleTime")
INPUT_PUBLISH_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dev/realtime_service_device/${date_path}/*/*/*"

check_await ${ODS_DMP_USER_INFO_ALL}_v2/${yes_bef2_day}/_SUCCESS
check_await ${ODS_DMP_USER_INFO_DAILY}/${yes_bef1_day}/_SUCCESS

hadoop fs -rmr ${OUTPUT_PATH}

# --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
# --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
spark-submit --class  mobvista.dmp.datasource.device.OdsDmpUserInfoAllV2 \
    --name "OdsDmpUserInfoAllV2.${yes_bef1_day}" \
    --conf spark.sql.shuffle.partitions=12000 \
	  --conf spark.default.parallelism=2000 \
    --conf spark.sql.files.maxPartitionBytes=268435456 \
    --conf spark.storage.memoryFraction=0.4 \
    --conf spark.shuffle.memoryFraction=0.4 \
    --conf spark.sql.adaptive.enabled=true \
    --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \
    --master yarn \
	  --deploy-mode cluster \
	  --executor-memory 12G \
	  --driver-memory 6G \
	  --executor-cores 5 \
	  --num-executors 100 \
    ../${JAR}  \
    -cur_day ${date} -output ${OUTPUT_PATH} -coalesce 3000 -input ${INPUT_PUBLISH_PATH}
if [[ $? -ne 0 ]]; then
    exit 255
fi

mount_partition "ods_dmp_user_info_all_v2" "dt='${yes_bef1_day}'" "${OUTPUT_PATH}"
if [[ $? -ne 0 ]]; then
    exit 255
fi

unmount_partition "ods_dmp_user_info_all_v2" "dt='${unmount_day}'" "${UNMOUNT_OUTPUT_PATH}"