#! /bin/bash source ../dmp_env.sh LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime") dt_yesterday=$(date +%Y%m%d -d "-2 day $ScheduleTime") dt=$(date +'%Y-%m-%d' -d "-1 day $ScheduleTime") date_path=$(date +'%Y/%m/%d' -d "-1 day $ScheduleTime") umount_date=$(date +%Y%m%d -d "-5 day $ScheduleTime") umount_date_path=$(date +'%Y/%m/%d' -d "-5 day $ScheduleTime") INSTALL_3S_INPUT="${DM_INSTALL_LIST}_v2/${date_path}/3s" ADN_INSTALL_INPUT="${DM_INSTALL_LIST}_v2/${date_path}/adn_install" ADN_REQUEST_INPUT="${DM_INSTALL_LIST}_v2/${date_path}/adn_request_sdk" DSP_REQUEST_INPUT="${DM_INSTALL_LIST}_v2/${date_path}/dsp_req" GA_INSTALL_DATE=$(get_recently_date "${DM_INSTALL_LIST}_v2" "${LOG_TIME}" "ga") OTHER_DATE=$(get_recently_date "${DM_INSTALL_LIST}_v2" "${LOG_TIME}" "other") DSP_GENDER_PATH="${GENDER_GET_DSP_PATH}/${date_path}" GA_GENDER_PATH="${GENDER_GET_GA_PATH}/${date_path}" FB_GENDER_PATH="${GENDER_GET_FACEBOOK_PATH}/${date_path}" TP_GENDER_PATH="${GENDER_GET_THIRDPART_PATH}/${date_path}" BM_GENDER_PATH="${GENDER_GET_BIG_MEDIA_PATH}/${date_path}" GENDER_OUTPUT="${INSTALL_GENDER_LR}/${date_path}" UMOUNT_GENDER_OUTPUT="${INSTALL_GENDER_LR}/${umount_date_path}" GENDER_BIN_OUTPUT="${INSTALL_GENDER_LR_BIN}/${LOG_TIME}" RM_GENDER_BIN_OUTPUT="${INSTALL_GENDER_LR_BIN}/${umount_date}" hadoop fs -test -e ${GENDER_OUTPUT} if [ $? -ne 0 ];then hadoop fs -mkdir -p ${GENDER_OUTPUT} fi hadoop fs -test -e ${GENDER_BIN_OUTPUT} if [ $? -ne 0 ];then hadoop fs -mkdir -p ${GENDER_BIN_OUTPUT} fi ORG_OUTPUT_PATH=${GENDER_BIN_OUTPUT}/org CALC_OUTPUT_PATH=${GENDER_BIN_OUTPUT}/calc hadoop fs -rmr ${GENDER_OUTPUT} hadoop fs -rmr ${ORG_OUTPUT_PATH} hadoop fs -rmr ${CALC_OUTPUT_PATH} check_await ${INSTALL_3S_INPUT}/_SUCCESS check_await ${ADN_INSTALL_INPUT}/_SUCCESS check_await ${ADN_REQUEST_INPUT}/_SUCCESS check_await ${DSP_REQUEST_INPUT}/_SUCCESS check_await ${GA_GENDER_PATH}/_SUCCESS check_await ${DSP_GENDER_PATH}/_SUCCESS ## check_await ${FB_GENDER_PATH}/_SUCCESS check_await ${TP_GENDER_PATH}/_SUCCESS ## check_await ${BM_GENDER_PATH}/_SUCCESS spark-submit --class mobvista.dmp.datasource.age_gender.MergeInstallGenderLR \ --name "MergeInstallGenderLR" \ --conf spark.yarn.executor.memoryOverhead=2048 --conf spark.network.timeout=720s \ --conf spark.sql.shuffle.partitions=20000 \ --conf spark.default.parallelism=20000 \ --conf spark.speculation=false \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.5 \ --conf spark.storage.memoryFraction=0.4 \ --conf spark.shuffle.memoryFraction=0.4 \ --conf spark.sql.files.maxPartitionBytes=134217728 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 100 \ ../${JAR} -date ${LOG_TIME} -dt_yesterday ${dt_yesterday} -ga_date ${GA_INSTALL_DATE} -other_date ${OTHER_DATE} \ -gender_output ${GENDER_OUTPUT} -gender_bin_output ${GENDER_BIN_OUTPUT} -org_gender_bin_output ${ORG_OUTPUT_PATH} -calc_gender_bin_output ${CALC_OUTPUT_PATH} -parallelism 1000 mount_partition "ods_device_gender" "dt='${LOG_TIME}'" "${GENDER_OUTPUT}" unmount_partition "ods_device_gender" "dt='${umount_date}'" "${UMOUNT_GENDER_OUTPUT}" hadoop fs -touchz ${GENDER_BIN_OUTPUT}/_SUCCESS hadoop fs -test -e ${RM_GENDER_BIN_OUTPUT} if [ $? -eq 0 ];then hadoop fs -rmr ${RM_GENDER_BIN_OUTPUT} fi