#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file :dm_realtime_service.sh # @desc :实时服务数据 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh today=${ScheduleTime:-$1} date=$(date +"%Y%m%d" -d "-1 day $today") unmount_date=$(date +"%Y%m%d" -d "-10 day $today") date_path=$(date +%Y/%m/%d -d "-1 day $today") unmount_date_path=$(date +%Y/%m/%d -d "-10 day $today") ODS_DMP_USER_INFO_ALL_INPUT_PATH="${ODS_DMP_USER_INFO_ALL}_v2/${date}" DM_DEVICE_TAG_STATISTICS_INPUT_PATH="${DM_DEVICE_TAG_STATISTICS_PATH}/$date_path" check_await "${ODS_DMP_USER_INFO_ALL_INPUT_PATH}/_SUCCESS" check_await "${DM_DEVICE_TAG_STATISTICS_INPUT_PATH}/_SUCCESS" # sleep 120s,避免HIVE元数据同步慢造成查询异常 sleep 30 output_path="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info/${date_path}" unmount_output_path="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_user_info/${unmount_date_path}" # export SPARK_HOME="/data/hadoop-home/engineplus-k8s-spark-3.0.0-hadoop3.2" # export SPARK_CONF_DIR="/data/hadoop-config/command-home/engineplus-k8s-spark-3.0.0-online/conf" spark-submit --class mobvista.dmp.datasource.retargeting.DeviceInfoJob \ --name "DeviceInfoJob.wangjf.${date}" \ --conf spark.sql.shuffle.partitions=10000 \ --conf spark.default.parallelism=10000 \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --master yarn --deploy-mode cluster --executor-memory 12g --driver-memory 10g --executor-cores 4 --num-executors 150 \ ../${JAR} \ -date ${date} -output ${output_path} -coalesce 3000 if [[ $? -ne 0 ]]; then exit 255 fi mount_partition "dm_user_info" "dt='${date}'" "${output_path}" unmount_partition "dm_user_info" "dt='${unmount_date}'" "${unmount_output_path}"