#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file : appid_package.sh # @author: houying # @date : 17-2-10 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d") year=${LOG_TIME:0:4} month=${LOG_TIME:4:2} day=${LOG_TIME:6:2} YES_TIME=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d") yes_year=${YES_TIME:0:4} yes_month=${YES_TIME:4:2} yes_day=${YES_TIME:6:2} SETTING_INPUT_PATH="$NGINX_SETTING_DAILY/$year/$month/$day" APPID_INPUT_PATH="$APP_ID_MAPPING/$yes_year/$yes_month/$yes_day" OUTPUT_PATH="$APP_ID_MAPPING/$year/$month/$day" hadoop fs -rm -r "$OUTPUT_PATH/*" hadoop jar ../${JAR} mobvista.dmp.datasource.setting.AppidPackageDictMR \ -Dmapreduce.job.reduces=30 \ -Dmapreduce.fileoutputcommitter.algorithm.version=2 \ "$SETTING_INPUT_PATH" "$APPID_INPUT_PATH" "$OUTPUT_PATH" || exit 1 mount_partition "ods_adn_ngix_setting_global" "yyyy='${year}',mm='${month}',dd='${day}'" "$OUTPUT_PATH" : ' spark-submit --class mobvista.dmp.datasource.setting.SettingTotal \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=30 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ --master yarn --deploy-mode cluster --name apps_flyer_total --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 5 \ ../${JAR} -outputtotal ${APP_ID_MAPPING_TMP} \ -coalesce 30 \ -today ${LOG_TIME} hadoop fs -rmr $OUTPUT_PATH hadoop distcp -m4 ${APP_ID_MAPPING_TMP}/* $OUTPUT_PATH/ ' echo "[Appid Package Mapping Job End!]"