#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file :get_ga_all.sh # @author :wangjf # @revision:2018-09-26 10:45:51 # @description: 重构 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime") date_path=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d") umount_time=$(date -d "$ScheduleTime 3 days ago" +"%Y%m%d") umount_date_path=$(date -d "$ScheduleTime 3 days ago" +"%Y/%m/%d") YES_TIME=$(date +%Y%m%d -d "-2 day $ScheduleTime") ## year=${LOG_TIME:0:4} ## month=${LOG_TIME:4:2} ## day=${LOG_TIME:6:2} GA_TOTAL_DATE=$(get_recently_date "${GA_TOTAL_PATH}" "${LOG_TIME}" "") # yes_date_path=$(date +'%Y/%m/%d' -d "-2 day $ScheduleTime") # JOB_NAME="age_prepare_ga" # INPUT_PATH_GA=$(get_recently_dir "${GA_TOTAL_PATH}" "${LOG_TIME}" "") # INPUT_PATH_GA="${GA_TOTAL_PATH}/${yes_date_path}" AGE_OUTPUT_PATH="${AGE_GET_GA_PATH}/${date_path}/" GENDER_OUTPUT_PATH="${GENDER_GET_GA_PATH}/${date_path}/" GENDER_UMOUNT_OUTPUT_PATH="${GENDER_GET_GA_PATH}/${umount_date_path}/" INPUT_PATH_GA=$(get_recently_dir "${GA_TOTAL_PATH}" "${LOG_TIME}" "") check_await ${INPUT_PATH_GA}_SUCCESS : ' hadoop fs -rm -r ${OUTPUT_PATH} hadoop jar ../${JAR} mobvista.dmp.datasource.age.mapreduce.GetGaAgeMR \ -Dmapreduce.job.reduces=60 \ "${INPUT_PATH_GA}" "${OUTPUT_PATH}" "${year}" || exit 1 ' parallelism_num=$(calculate_reduce_num ${INPUT_PATH_GA}) business="ga" hadoop fs -rm -r "${AGE_OUTPUT_PATH}/" hadoop fs -rm -r "${GENDER_OUTPUT_PATH}/" spark-submit --class mobvista.dmp.datasource.age_gender.GetAgeGender \ --name "mobvista.dmp.datasource.age_gender.GetAgeGender.${business}.${LOG_TIME}" \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.network.timeout=720s \ --conf spark.sql.shuffle.partitions=${parallelism_num} \ --conf spark.default.parallelism=${parallelism_num} \ --conf spark.speculation=false \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.5 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} -ageOutput ${AGE_OUTPUT_PATH} -genderOutput ${GENDER_OUTPUT_PATH} -date ${GA_TOTAL_DATE} -business ${business} if [[ $? -ne 0 ]];then exit 255 fi mount_partition "ods_gender_ga" "dt='${LOG_TIME}'" "${GENDER_OUTPUT_PATH}" unmount_partition "ods_gender_ga" "dt='${umount_time}'" "${GENDER_UMOUNT_OUTPUT_PATH}" hadoop fs -touchz ${GENDER_OUTPUT_PATH}/_SUCCESS