appsflyer_total.sh 3.4 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
#!/usr/bin/env bash

source ../dmp_env.sh
source ././../ga_rawdata_analysis/common/tools.sh

today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
dt_today=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d")
dt_today_dash=$(date -d "$ScheduleTime 1 days ago" +"%Y-%m-%d")
umount_time=$(date -d "$ScheduleTime 10 days ago" +"%Y%m%d")
umount_date_path=$(date -d "$ScheduleTime 10 days ago" +"%Y/%m/%d")

expire_path=$(date +%Y/%m/%d -d "-6 day $today")
expire_date=$(date +%Y%m%d -d "-6 day $today")

echo ${today}


## INPUT_PATH="${ETL_AF_ORG_DAILY}/${today}"
INPUT_PATH="${ETL_AF_ORG_NEW_DAILY}/${dt_today}"


OUTPUT_TOTAL_PATH="${ETL_AF_ORG_TOTAL}/${today}"
UMOUNT_OUTPUT_TOTAL_PATH="${ETL_AF_ORG_TOTAL}/${umount_date_path}"


DMP_USER_INFO_OUTPUT_PATH="${ODS_DMP_USER_INFO}/${today}/appsflyer"
DMP_USER_INFO_UNMOUNT_PATH="${ODS_DMP_USER_INFO}/${expire_path}/appsflyer"


DMP_INSTALL_LIST_OUTPUT_PATH="${DM_INSTALL_LIST}_v2/${today}/appsflyer"



## check_await "${INPUT_PATH}/_SUCCESS"
## check_await "${AF_ORG_TOTAL_YES}/_SUCCESS"

## mount_partition "etl_af_org_daily" "dt='${dt_today}'" "$INPUT_PATH"
mount_partition "etl_appsflyer_audience_org" "dt='${dt_today}'" "$INPUT_PATH"

mount_partition "dm_install_list_v2" "dt='${dt_today}', business='appsflyer'" "${DMP_INSTALL_LIST_OUTPUT_PATH}"



hadoop fs -rm -r "${OUTPUT_TOTAL_PATH}/"
hadoop fs -rm -r "${DMP_USER_INFO_OUTPUT_PATH}/"
hadoop fs -rm -r "${DMP_INSTALL_LIST_OUTPUT_PATH}/"




mount_partition "etl_af_org_total" "dt='${dt_today}'" "${OUTPUT_TOTAL_PATH}"
mount_partition "ods_dmp_user_info" "dt='${dt_today}', business='appsflyer'" "${DMP_USER_INFO_OUTPUT_PATH}"
: '
HIVE_CMD=$(hive_func)

$HIVE_CMD	-v  -hivevar  dt_today ${dt_today}     -hivevar update_date ${dt_today_dash}  -f	appsflyer_total.sql
'


 spark-submit --class mobvista.dmp.datasource.appsflyer.AppsFlyerTotal \
  --conf spark.yarn.executor.memoryOverhead=2048  \
  --conf spark.network.timeout=720s \
  --conf spark.default.parallelism=10 \
  --conf spark.sql.autoBroadcastJoinThreshold=31457280 \
  --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \
66
  --master yarn --deploy-mode cluster --name  apps_flyer_total --executor-memory 4g --driver-memory 4g  --executor-cores 3  --num-executors 5 \
wang-jinfeng committed
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
  ../${JAR}  -outputtotal ${OUTPUT_TOTAL_PATH} -dmpuserinfo ${DMP_USER_INFO_OUTPUT_PATH} \
  -coalesce 20 \
  -today ${dt_today}  -update_date ${dt_today_dash}

 if [ $? -ne 0 ];then
   exit 255
 fi


unmount_partition "etl_af_org_total" "dt='${umount_time}'" "${UMOUNT_OUTPUT_TOTAL_PATH}"
unmount_partition "ods_dmp_user_info" "dt='${expire_date}', business='appsflyer'" "${DMP_USER_INFO_UNMOUNT_PATH}"



hive_cmd  "insert overwrite table dwh.dm_install_list_v2 partition(dt='${dt_today}',business='appsflyer')
     select device_id,
     device_type,
     platform,
     package_names,
     '${dt_today_dash}' update_date
     from dwh.etl_af_org_total where dt ='${dt_today}' "



hadoop fs -test -e ${OUTPUT_TOTAL_PATH}
if [ $? -ne 0 ];then
  hadoop fs -mkdir  ${OUTPUT_TOTAL_PATH}
fi

hadoop fs -test -e ${DMP_USER_INFO_OUTPUT_PATH}
if [ $? -ne 0 ];then
  hadoop fs -mkdir  ${DMP_USER_INFO_OUTPUT_PATH}
fi
hadoop fs -test -e ${DMP_INSTALL_LIST_OUTPUT_PATH}
if [ $? -ne 0 ];then
  hadoop fs -mkdir  ${DMP_INSTALL_LIST_OUTPUT_PATH}
fi

hadoop fs -touchz ${OUTPUT_TOTAL_PATH}/_SUCCESS
hadoop fs -touchz ${DMP_USER_INFO_OUTPUT_PATH}/_SUCCESS
hadoop fs -touchz ${DMP_INSTALL_LIST_OUTPUT_PATH}/_SUCCESS