#!/usr/bin/env bash source ../dmp_env.sh source ././../ga_rawdata_analysis/common/tools.sh today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d") dt_today=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d") dt_today_dash=$(date -d "$ScheduleTime 1 days ago" +"%Y-%m-%d") umount_time=$(date -d "$ScheduleTime 10 days ago" +"%Y%m%d") umount_date_path=$(date -d "$ScheduleTime 10 days ago" +"%Y/%m/%d") expire_path=$(date +%Y/%m/%d -d "-6 day $today") expire_date=$(date +%Y%m%d -d "-6 day $today") echo ${today} ## INPUT_PATH="${ETL_AF_ORG_DAILY}/${today}" INPUT_PATH="${ETL_AF_ORG_NEW_DAILY}/${dt_today}" OUTPUT_TOTAL_PATH="${ETL_AF_ORG_TOTAL}/${today}" UMOUNT_OUTPUT_TOTAL_PATH="${ETL_AF_ORG_TOTAL}/${umount_date_path}" DMP_USER_INFO_OUTPUT_PATH="${ODS_DMP_USER_INFO}/${today}/appsflyer" DMP_USER_INFO_UNMOUNT_PATH="${ODS_DMP_USER_INFO}/${expire_path}/appsflyer" DMP_INSTALL_LIST_OUTPUT_PATH="${DM_INSTALL_LIST}_v2/${today}/appsflyer" ## check_await "${INPUT_PATH}/_SUCCESS" ## check_await "${AF_ORG_TOTAL_YES}/_SUCCESS" ## mount_partition "etl_af_org_daily" "dt='${dt_today}'" "$INPUT_PATH" mount_partition "etl_appsflyer_audience_org" "dt='${dt_today}'" "$INPUT_PATH" mount_partition "dm_install_list_v2" "dt='${dt_today}', business='appsflyer'" "${DMP_INSTALL_LIST_OUTPUT_PATH}" hadoop fs -rm -r "${OUTPUT_TOTAL_PATH}/" hadoop fs -rm -r "${DMP_USER_INFO_OUTPUT_PATH}/" hadoop fs -rm -r "${DMP_INSTALL_LIST_OUTPUT_PATH}/" mount_partition "etl_af_org_total" "dt='${dt_today}'" "${OUTPUT_TOTAL_PATH}" mount_partition "ods_dmp_user_info" "dt='${dt_today}', business='appsflyer'" "${DMP_USER_INFO_OUTPUT_PATH}" : ' HIVE_CMD=$(hive_func) $HIVE_CMD -v -hivevar dt_today ${dt_today} -hivevar update_date ${dt_today_dash} -f appsflyer_total.sql ' spark-submit --class mobvista.dmp.datasource.appsflyer.AppsFlyerTotal \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=10 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --deploy-mode cluster --name apps_flyer_total --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 5 \ ../${JAR} -outputtotal ${OUTPUT_TOTAL_PATH} -dmpuserinfo ${DMP_USER_INFO_OUTPUT_PATH} \ -coalesce 20 \ -today ${dt_today} -update_date ${dt_today_dash} if [ $? -ne 0 ];then exit 255 fi unmount_partition "etl_af_org_total" "dt='${umount_time}'" "${UMOUNT_OUTPUT_TOTAL_PATH}" unmount_partition "ods_dmp_user_info" "dt='${expire_date}', business='appsflyer'" "${DMP_USER_INFO_UNMOUNT_PATH}" hive_cmd "insert overwrite table dwh.dm_install_list_v2 partition(dt='${dt_today}',business='appsflyer') select device_id, device_type, platform, package_names, '${dt_today_dash}' update_date from dwh.etl_af_org_total where dt ='${dt_today}' " hadoop fs -test -e ${OUTPUT_TOTAL_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir ${OUTPUT_TOTAL_PATH} fi hadoop fs -test -e ${DMP_USER_INFO_OUTPUT_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir ${DMP_USER_INFO_OUTPUT_PATH} fi hadoop fs -test -e ${DMP_INSTALL_LIST_OUTPUT_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir ${DMP_INSTALL_LIST_OUTPUT_PATH} fi hadoop fs -touchz ${OUTPUT_TOTAL_PATH}/_SUCCESS hadoop fs -touchz ${DMP_USER_INFO_OUTPUT_PATH}/_SUCCESS hadoop fs -touchz ${DMP_INSTALL_LIST_OUTPUT_PATH}/_SUCCESS