rtdmp_as_v2.sh 2.69 KB
#!/bin/bash

source ../dmp_env.sh

today=${ScheduleTime}

date_time=$(date +"%Y-%m-%d.%H" -d "-1 hour $today")

date_path=$(date +%Y/%m/%d/%H -d "-1 hour $today")

INPUT_DATA="s3://mob-emr-test/dataplatform/rtdmp_deal/${date_path}"

part_num=$(hadoop fs -ls ${INPUT_DATA} | wc -l)
if [[ ${part_num} -le 10 ]]; then
  echo "This Dir No Data !!!"
  partition=10
  coalesce=10
  executor=2
  memory=4
  core=2
else
  partition=2000
  coalesce=200
  executor=50
  memory=10
  core=3
fi

OUTPUT="s3://mob-emr-test/dataplatform/rtdmp/${date_path}"

spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpASV2 \
  --name "RTDmpASV2.${date_time}" \
  --conf spark.sql.shuffle.partitions=${partition} \
  --conf spark.default.parallelism=${partition} \
  --conf spark.kryoserializer.buffer.max=512m \
  --conf spark.kryoserializer.buffer=64m \
  --master yarn --deploy-mode cluster --executor-memory ${memory}g --driver-memory 8g --executor-cores ${core} --num-executors ${executor} \
  ../${JAR} -input_data ${INPUT_DATA} -output ${OUTPUT} -coalesce ${coalesce} -time "${date_time}"

if [[ $? -ne 0 ]]; then
  exit 255
fi

ads="foractivation adx normal"

# 2020-11-20 14:45:52 移除按 region 输出逻辑

: '
res="cn virginia seoul tokyo frankfurt singapore"

for ad in ${ads}
do
    for re in ${res}
    do
        if hadoop fs -ls "$OUTPUT/data/${ad}/$re" > /dev/null 2>&1
        then
            hadoop fs -touchz ${OUTPUT}/data/${ad}/${re}/_SUCCESS
        else
            hadoop fs -mkdir -p ${OUTPUT}/data/${ad}/${re}
            hadoop fs -touchz ${OUTPUT}/data/${ad}/${re}/_SUCCESS
        fi
    done
    hadoop fs -touchz ${OUTPUT}/data/${ad}/_SUCCESS
done

if [[ $? -ne 0 ]]; then
    exit 255
fi
'

for ad in ${ads}; do
  if hadoop fs -ls "$OUTPUT/data_v2/${ad}" >/dev/null 2>&1; then
    hadoop fs -touchz ${OUTPUT}/data_v2/${ad}/_SUCCESS
  else
    hadoop fs -mkdir -p ${OUTPUT}/data_v2/${ad}
    hadoop fs -touchz ${OUTPUT}/data_v2/${ad}/_SUCCESS
  fi
  if hadoop fs -ls "$OUTPUT/data/${ad}" >/dev/null 2>&1; then
    hadoop fs -touchz ${OUTPUT}/data/${ad}/_SUCCESS
  else
    hadoop fs -mkdir -p ${OUTPUT}/data/${ad}
    hadoop fs -touchz ${OUTPUT}/data/${ad}/_SUCCESS
  fi
done

hadoop fs -touchz ${OUTPUT}/_SUCCESS

if [[ $? -ne 0 ]]; then
  exit 255
fi

expire_date_path=$(date +%Y/%m/%d/%H -d "-168 hour $today")

EXPIRE_RTDMP_OUTPUT_PATH="s3://mob-emr-test/dataplatform/rtdmp/${expire_date_path}"

if hadoop fs -ls "$EXPIRE_RTDMP_OUTPUT_PATH" >/dev/null 2>&1; then
  hadoop dfs -rm -r ${EXPIRE_RTDMP_OUTPUT_PATH}
fi

EXPIRE_RTDMP_DEAL_OUTPUT_PATH="s3://mob-emr-test/dataplatform/rtdmp_deal/${expire_date_path}"
if hadoop fs -ls "$EXPIRE_RTDMP_DEAL_OUTPUT_PATH" >/dev/null 2>&1; then
  hadoop dfs -rm -r ${EXPIRE_RTDMP_DEAL_OUTPUT_PATH}
fi