#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file :dm_realtime_service_region.sh # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh days=$1 today=${ScheduleTime:-$2} date=$(date +"%Y%m%d" -d "-1 day $today") date_path=$(date +%Y/%m/%d -d "-1 day $today") unmount_date=$(date +"%Y%m%d" -d "-10 day $today") unmount_date_path=$(date +%Y/%m/%d -d "-10 day $today") INTPUT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info/${date_path}" check_await ${INTPUT}/_SUCCESS # sleep 300s,避免HIVE元数据同步慢造成查询异常 sleep 120 output_path="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info_region/${date_path}" unmount_output_path="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info_region/${unmount_date_path}" spark-submit --class mobvista.dmp.datasource.retargeting.UserFeatureFilterJob \ --name "mobvista.dmp.datasource.retargeting.UserFeatureFilterJob_wangjf_${date}" \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.default.parallelism=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 3 --num-executors 100 \ ../${JAR} \ -date ${date} -output ${output_path} -coalesce 1000 -days ${days} if [[ $? -ne 0 ]]; then exit 255 fi # mount_partition "dm_user_info" "dt='${date}'" "${output_path}" # mount_partition "dm_user_info" "dt='${date}'" "${output_path}" # unmount_partition "dm_user_info_v2" "dt='${unmount_date}'" "${unmount_output_path}"