id_mapping.sh 1.75 KB
#! /bin/bash

source ../dmp_env.sh

COUNTRY=$1

PLATFORM=$2

if [[ ${COUNTRY} = 'cn' ]]; then
  if [[ ${PLATFORM} = 'android' ]]; then
    partition=1000
    executors=100
    cores=3
    coalesce=500
  else
    partition=1000
    executors=100
    cores=3
    coalesce=500
  fi
else
  if [[ ${PLATFORM} = 'android' ]]; then
    partition=10000
    executors=300
    cores=3
    coalesce=3000
  else
    partition=2000
    executors=100
    cores=3
    coalesce=500
  fi
fi

LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")

date_path=$(date +'%Y/%m/%d' -d "-1 day $ScheduleTime")

ADN_REQUEST_INPUT_PATH=${ID_MAPPING}/${date_path}/adn_request

DSP_INPUT_PATH=${ID_MAPPING}/${date_path}/dsp_req

check_await "${ADN_REQUEST_INPUT_PATH}/$PLATFORM/_SUCCESS"

# check_await "${DSP_INPUT_PATH}/$PLATFORM/_SUCCESS"

before_date_path=$(date +'%Y/%m/%d' -d "-2 day $ScheduleTime")

OLD_ID_MAPPING_PATH=${ADS_DEVICE_ID_MAPPING}/${before_date_path}/${COUNTRY}/${PLATFORM}

check_await "${OLD_ID_MAPPING_PATH}/mid/_SUCCESS"

OUTPUT_PATH=${ADS_DEVICE_ID_MAPPING}/${date_path}/${COUNTRY}/${PLATFORM}

spark-submit --class mobvista.dmp.datasource.id_mapping.IDMappingGraphx \
  --name "IDMappingGraphx.${LOG_TIME}.${COUNTRY}.${PLATFORM}" \
  --conf spark.yarn.executor.memoryOverhead=2048 \
  --conf spark.network.timeout=720s \
  --conf spark.sql.shuffle.partitions=${partition} \
  --conf spark.default.parallelism=${partition} \
  --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores ${cores} --num-executors ${executors} \
  ../${JAR} -date ${LOG_TIME} -country ${COUNTRY} -platform ${PLATFORM} \
  -output ${OUTPUT_PATH}/mid -fre_output ${OUTPUT_PATH}/frequency -result_output ${OUTPUT_PATH}/result -coalesce ${coalesce}

if [ $? -ne 0 ]; then
  exit 255
fi