dm_realtime_service_region.sh 1.83 KB
#!/bin/bash

# # # # # # # # # # # # # # # # # # # # # #
# @file    :dm_realtime_service_region.sh
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

days=$1

today=${ScheduleTime:-$2}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
unmount_date=$(date +"%Y%m%d" -d "-10 day $today")
unmount_date_path=$(date +%Y/%m/%d -d "-10 day $today")

INTPUT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info/${date_path}"

check_await ${INTPUT}/_SUCCESS

# sleep 300s,避免HIVE元数据同步慢造成查询异常
sleep 120

output_path="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info_region/${date_path}"
unmount_output_path="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info_region/${unmount_date_path}"

spark-submit --class mobvista.dmp.datasource.retargeting.UserFeatureFilterJob \
     --name "mobvista.dmp.datasource.retargeting.UserFeatureFilterJob_wangjf_${date}" \
     --conf spark.sql.shuffle.partitions=2000 \
     --conf spark.default.parallelism=2000 \
     --conf spark.sql.broadcastTimeout=1200 \
     --conf spark.kryoserializer.buffer.max=256m \
     --conf spark.yarn.executor.memoryOverhead=2048 \
     --conf spark.sql.files.maxPartitionBytes=268435456 \
     --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
     --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
     --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g  --executor-cores 3 --num-executors 100 \
     ../${JAR} \
     -date ${date} -output ${output_path} -coalesce 1000 -days ${days}

if [[ $? -ne 0 ]]; then
    exit 255
fi

# mount_partition "dm_user_info" "dt='${date}'" "${output_path}"
# mount_partition "dm_user_info" "dt='${date}'" "${output_path}"

# unmount_partition "dm_user_info_v2" "dt='${unmount_date}'" "${unmount_output_path}"