dm_realtime_service.sh 1.97 KB
#!/bin/bash

# # # # # # # # # # # # # # # # # # # # # #
# @file    :dm_realtime_service.sh
# @desc    :实时服务数据
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
unmount_date=$(date +"%Y%m%d" -d "-10 day $today")

date_path=$(date +%Y/%m/%d -d "-1 day $today")
unmount_date_path=$(date +%Y/%m/%d -d "-10 day $today")

ODS_DMP_USER_INFO_ALL_INPUT_PATH="${ODS_DMP_USER_INFO_ALL}_v2/${date}"
DM_DEVICE_TAG_STATISTICS_INPUT_PATH="${DM_DEVICE_TAG_STATISTICS_PATH}/$date_path"

check_await "${ODS_DMP_USER_INFO_ALL_INPUT_PATH}/_SUCCESS"
check_await "${DM_DEVICE_TAG_STATISTICS_INPUT_PATH}/_SUCCESS"

# sleep 120s,避免HIVE元数据同步慢造成查询异常
sleep 30

output_path="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info/${date_path}"
unmount_output_path="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info/${unmount_date_path}"

# export SPARK_HOME="/data/hadoop-home/engineplus-k8s-spark-3.0.0-hadoop3.2"

# export SPARK_CONF_DIR="/data/hadoop-config/command-home/engineplus-k8s-spark-3.0.0-online/conf"

spark-submit --class mobvista.dmp.datasource.retargeting.DeviceInfoJob \
     --name "DeviceInfoJob.wangjf.${date}" \
     --conf spark.sql.shuffle.partitions=10000 \
     --conf spark.default.parallelism=10000 \
     --conf spark.kryoserializer.buffer.max=512m \
     --conf spark.kryoserializer.buffer=64m \
     --conf spark.sql.files.maxPartitionBytes=536870912 \
     --conf spark.sql.adaptive.enabled=true \
     --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
     --master yarn --deploy-mode cluster --executor-memory 12g --driver-memory 10g --executor-cores 4 --num-executors 150 \
     ../${JAR} \
     -date ${date} -output ${output_path} -coalesce 3000

if [[ $? -ne 0 ]]; then
    exit 255
fi

mount_partition "dm_user_info" "dt='${date}'" "${output_path}"

unmount_partition "dm_user_info" "dt='${unmount_date}'" "${unmount_output_path}"