#!/usr/bin/env bash source ../dmp_env.sh source ././../ga_rawdata_analysis/common/tools.sh dt_today=$(date -d "$ScheduleTime 2 days ago" +"%Y%m%d") dt_yesterday=$(date -d "$ScheduleTime 3 days ago" +"%Y%m%d") dt_oneday_ago=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d") syn_3s_day=$(date -d "$ScheduleTime 2 days ago" +"%m%d") dt_slash_today=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d") dt_dash_today=$(date -d "$ScheduleTime 2 days ago" +"%Y-%m-%d") dt_dash_rec15day=$(date -d "$ScheduleTime 16 days ago" +"%Y-%m-%d") expire_path=$(date +%Y/%m/%d -d "-7 day $today") expire_date=$(date +%Y%m%d -d "-7 day $today") DMP_ALI_ACTIVATION_USER_INFO_OUTPUT_PATH="${ODS_DMP_USER_INFO}/${dt_slash_today}/ali_activation" DMP_ALI_ACQUISTION_USER_INFO_OUTPUT_PATH="${ODS_DMP_USER_INFO}/${dt_slash_today}/ali_acquisition" DMP_ALI_ACTIVATION_INSTALL_LIST_OUTPUT_PATH="${DM_INSTALL_LIST}_v2/${dt_slash_today}/ali_activation" DMP_ALI_ACQUISTION_INSTALL_LIST_OUTPUT_PATH="${DM_INSTALL_LIST}_v2/${dt_slash_today}/ali_acquisition" DMP_ALI_ACTIVATION_USER_INFO_UNMOUNT_PATH="${ODS_DMP_USER_INFO}/${expire_path}/ali_activation" DMP_ALI_ACQUISTION_USER_INFO_UNMOUNT_PATH="${ODS_DMP_USER_INFO}/${expire_path}/ali_acquisition" ## 从oss拉取设备打标识 input_file_name() ALI_POSTBACK_PATH="${ALI_USER_ACTIVATION_POSTBACK_PATH}/${dt_slash_today}" check_await "${ALI_POSTBACK_PATH}/_SUCCESS" ALI_IOS_POSTBACK_PATH="${ALI_IOS_USER_ACTIVATION_POSTBACK_PATH}/${dt_slash_today}" check_await "${ALI_IOS_POSTBACK_PATH}/_SUCCESS" ALI_OAID_POSTBACK_PATH="${ALI_OAID_USER_ACTIVATION_POSTBACK_PATH}/${dt_slash_today}" check_await "${ALI_OAID_POSTBACK_PATH}/_SUCCESS" ##当天没有拉新数据用前一天拉新数据 mount_partition "ali_user_postback_activation_daily" "dt='${dt_today}'" "$ALI_POSTBACK_PATH" mount_partition "ali_ios_user_postback_activation_daily" "dt='${dt_today}'" "$ALI_IOS_POSTBACK_PATH" mount_partition "ali_oaid_user_postback_activation_daily" "dt='${dt_today}'" "$ALI_OAID_POSTBACK_PATH" OUTPUT_PATH="${ETL_ALI_USERINFO_ACTIVATION_PATH}/${dt_slash_today}" ALI_IOS_OUTPUT="${ETL_ALI_IOS_USERINFO_ACTIVATION_PATH}/${dt_slash_today}" ALI_OAID_OUTPUT="${ETL_ALI_OAID_USERINFO_ACTIVATION_PATH}/${dt_slash_today}" ALI_USER_ACTIVATION_SYS_TO3S_PATH="$ALI_USER_ACTIVATION_SYS_TO3S/${dt_slash_today}" ALI_USER_TO_3S_RES_PATH="${ALI_USERINFO_TO_3S_PATH}/${dt_slash_today}" ALI_USER_TO_3S_TOUTIAO_RES_PATH="${ALI_USERINFO_TO_3S_TOUTIAO_PATH}/${dt_slash_today}" ALI_USERINFO_TO_3S_GUANGDIANTONG_RES_PATH="${ALI_USERINFO_TO_3S_GUANGDIANTONG_PATH}/${dt_slash_today}" hadoop fs -rm -r "$OUTPUT_PATH" hadoop fs -rm -r "${DMP_ALI_ACTIVATION_USER_INFO_OUTPUT_PATH}" hadoop fs -rm -r "${DMP_ALI_ACQUISTION_USER_INFO_OUTPUT_PATH}" hadoop fs -rm -r "${DMP_ALI_ACTIVATION_INSTALL_LIST_OUTPUT_PATH}" hadoop fs -rm -r "${DMP_ALI_ACQUISTION_INSTALL_LIST_OUTPUT_PATH}" hadoop fs -rm -r "${ALI_USER_ACTIVATION_SYS_TO3S_PATH}" hadoop fs -rm -r "${ALI_USER_TO_3S_RES_PATH}" hadoop fs -rm -r "${ALI_USER_TO_3S_TOUTIAO_RES_PATH}" hadoop fs -rm -r "${ALI_USERINFO_TO_3S_GUANGDIANTONG_RES_PATH}" mount_partition "ods_dmp_user_info" "dt='${dt_today}', business='ali_activation'" "${DMP_ALI_ACTIVATION_USER_INFO_OUTPUT_PATH}" mount_partition "ods_dmp_user_info" "dt='${dt_today}', business='ali_acquisition'" "${DMP_ALI_ACQUISTION_USER_INFO_OUTPUT_PATH}" unmount_partition "ods_dmp_user_info" "dt='${expire_date}', business='ali_activation'" "${DMP_ALI_ACTIVATION_USER_INFO_UNMOUNT_PATH}" unmount_partition "ods_dmp_user_info" "dt='${expire_date}', business='ali_acquisition'" "${DMP_ALI_ACQUISTION_USER_INFO_UNMOUNT_PATH}" mount_partition "etl_ali_user_activation_total" "dt='${dt_today}'" "$OUTPUT_PATH" mount_partition "etl_ali_ios_user_activation_total" "dt='${dt_today}'" "$ALI_IOS_OUTPUT" mount_partition "etl_ali_oaid_user_activation_total" "dt='${dt_today}'" "$ALI_OAID_OUTPUT" mount_partition "dm_install_list_v2" "dt='${dt_today}', business='ali_activation'" "${DMP_ALI_ACTIVATION_INSTALL_LIST_OUTPUT_PATH}" mount_partition "dm_install_list_v2" "dt='${dt_today}', business='ali_acquisition'" "${DMP_ALI_ACQUISTION_INSTALL_LIST_OUTPUT_PATH}" mount_partition "gdt_data" "day='${dt_today}', category='all', business='total'" "${ALI_USER_ACTIVATION_SYS_TO3S_PATH}" spark-submit --class mobvista.dmp.datasource.taobao.EtlAliActivitionPostBackDaily \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=2000 \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 60 \ ../${JAR} -output ${OUTPUT_PATH} -iosoutput ${ALI_IOS_OUTPUT} -oaidoutput ${ALI_OAID_OUTPUT} -coalesce 50 \ -today ${dt_today} -update_date ${dt_dash_today} \ -dt_dash_rec15day ${dt_dash_rec15day} -syn_to_3s ${ALI_USER_ACTIVATION_SYS_TO3S_PATH} -syn_3s_day ${syn_3s_day} if [ $? -ne 0 ];then exit 255 fi HIVE_CMD=$(hive_func) $HIVE_CMD -v -hivevar dt_today ${dt_today} -hivevar update_date ${dt_dash_today} -hivevar dt_yesterday ${dt_yesterday} -hivevar dt_oneday_ago ${dt_oneday_ago} -f ali_userinfo_postback_activation_daily.sql hadoop fs -test -e $OUTPUT_PATH if [ $? -ne 0 ];then hadoop fs -mkdir $OUTPUT_PATH fi hadoop fs -test -e ${DMP_ALI_ACTIVATION_USER_INFO_OUTPUT_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir ${DMP_ALI_ACTIVATION_USER_INFO_OUTPUT_PATH} fi hadoop fs -test -e ${DMP_ALI_ACQUISTION_USER_INFO_OUTPUT_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir ${DMP_ALI_ACQUISTION_USER_INFO_OUTPUT_PATH} fi hadoop fs -test -e ${DMP_ALI_ACTIVATION_INSTALL_LIST_OUTPUT_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir ${DMP_ALI_ACTIVATION_INSTALL_LIST_OUTPUT_PATH} fi hadoop fs -test -e ${DMP_ALI_ACQUISTION_INSTALL_LIST_OUTPUT_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir ${DMP_ALI_ACQUISTION_INSTALL_LIST_OUTPUT_PATH} fi hadoop fs -touchz $OUTPUT_PATH/_SUCCESS hadoop fs -touchz $ALI_IOS_OUTPUT/_SUCCESS hadoop fs -touchz $ALI_OAID_OUTPUT/_SUCCESS hadoop fs -touchz ${DMP_ALI_ACTIVATION_USER_INFO_OUTPUT_PATH}/_SUCCESS hadoop fs -touchz ${DMP_ALI_ACQUISTION_USER_INFO_OUTPUT_PATH}/_SUCCESS hadoop fs -touchz ${DMP_ALI_ACQUISTION_INSTALL_LIST_OUTPUT_PATH}/_SUCCESS hadoop fs -touchz ${DMP_ALI_ACTIVATION_INSTALL_LIST_OUTPUT_PATH}/_SUCCESS #推快手数据 hadoop fs -test -e ${ALI_USER_TO_3S_RES_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir -p ${ALI_USER_TO_3S_RES_PATH} fi dirs=$(hadoop fs -ls ${ALI_USER_ACTIVATION_SYS_TO3S_PATH} | awk -F'/' '{print $12}' | grep -E "imei|idfa") for path in $dirs;do hadoop fs -text ${ALI_USER_ACTIVATION_SYS_TO3S_PATH}/${path}/* > ${path}.csv split -b 500m ${path}.csv -d -a 2 ${path}_ hadoop fs -put ${path}_* "${ALI_USER_TO_3S_RES_PATH}/" rm -f ${path}* done hadoop fs -touchz ${ALI_USER_TO_3S_RES_PATH}/_SUCCESS #推头条数据 hadoop fs -test -e ${ALI_USER_TO_3S_TOUTIAO_RES_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir -p ${ALI_USER_TO_3S_TOUTIAO_RES_PATH} fi dirs=$(hadoop fs -ls ${ALI_USER_ACTIVATION_SYS_TO3S_PATH} | awk -F'/' '{print $12}' | grep -E "imei|idfa") for path in $dirs;do hadoop fs -text ${ALI_USER_ACTIVATION_SYS_TO3S_PATH}/${path}/* > ${path}.csv split -b 55m ${path}.csv -d -a 2 ${path}_ for new_path in $(ls ${path}_*) do echo ${new_path} device_id=$(echo ${new_path}|awk -F '_' '{print $(NF-1)}') echo $device_id result_path=${new_path#*_} java -cp ../${JAR} mobvista.dmp.datasource.toutiao.dmp.TouTiaoUtil ${new_path} ${result_path}_1 ${device_id} zip ${result_path}_1.zip ${result_path}_1 hadoop fs -put ${result_path}_1.zip "${ALI_USER_TO_3S_TOUTIAO_RES_PATH}/" rm -f ${new_path} ${result_path}* done rm -f ${path}.csv done hadoop fs -touchz ${ALI_USER_TO_3S_TOUTIAO_RES_PATH}/_SUCCESS #推广点通数据 hadoop fs -test -e ${ALI_USERINFO_TO_3S_GUANGDIANTONG_RES_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir -p ${ALI_USERINFO_TO_3S_GUANGDIANTONG_RES_PATH} fi dirs=$(hadoop fs -ls ${ALI_USER_ACTIVATION_SYS_TO3S_PATH} | awk -F'/' '{print $12}' | grep -E "imei|idfa") for path in $dirs;do tmp_result_path=${path#*_} echo $tmp_result_path if [[ $tmp_result_path =~ "idfa" ]] then result_path=${tmp_result_path%_*}_HASH_IDFA echo $result_path elif [[ $tmp_result_path =~ "imei" ]]; then result_path=${tmp_result_path%_*}_HASH_IMEI echo $result_path else echo "error" exit 1 fi hadoop fs -text ${ALI_USER_ACTIVATION_SYS_TO3S_PATH}/${path}/* > ${result_path}.csv split -b 160m ${result_path}.csv -d -a 1 ${result_path}_ for new_path in $(ls ${result_path}_*) do zip ${new_path}.zip ${new_path} hadoop fs -put ${new_path}.zip "${ALI_USERINFO_TO_3S_GUANGDIANTONG_RES_PATH}/" rm -f ${new_path}* done rm -f ${result_path}.csv done hadoop fs -touchz ${ALI_USERINFO_TO_3S_GUANGDIANTONG_RES_PATH}/_SUCCESS ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_RES_PATH="${ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_PATH}/${dt_slash_today}" ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_RES_PATH="${ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_PATH}/${dt_slash_today}" ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_RES_PATH_TMP="${GDT_DATA}/day=${dt_today}/category=append/" ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_RES_PATH_TMP="${GDT_DATA}/day=${dt_today}/category=delete/" #推广点通追加数据 hadoop fs -test -e ${ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_RES_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir -p ${ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_RES_PATH} fi dirs=$(hadoop fs -ls ${ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_RES_PATH_TMP} | awk -F'/' '{print $11}' | grep -E "imei|idfa") for path in $dirs;do tmp_result_path=${path#*=} echo $tmp_result_path if [[ $tmp_result_path =~ "idfa" ]] then result_path=${tmp_result_path%_*}_HASH_IDFA echo $result_path elif [[ $tmp_result_path =~ "imei" ]]; then result_path=${tmp_result_path%_*}_HASH_IMEI echo $result_path else echo "error" exit 1 fi hadoop fs -text ${ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_RES_PATH_TMP}/${path}/* > ${result_path}.csv split -b 160m ${result_path}.csv -d -a 1 ${result_path}_ for new_path in $(ls ${result_path}_*) do zip ${new_path}.zip ${new_path} hadoop fs -put ${new_path}.zip "${ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_RES_PATH}/" rm -f ${new_path}* done rm -f ${result_path}.csv done hadoop fs -touchz ${ALI_USERINFO_TO_3S_GUANGDIANTONG_APPEND_RES_PATH}/_SUCCESS #推广点通删除数据 hadoop fs -test -e ${ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_RES_PATH} if [ $? -ne 0 ];then hadoop fs -mkdir -p ${ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_RES_PATH} fi dirs=$(hadoop fs -ls ${ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_RES_PATH_TMP} | awk -F'/' '{print $11}' | grep -E "imei|idfa") for path in $dirs;do tmp_result_path=${path#*=} echo $tmp_result_path if [[ $tmp_result_path =~ "idfa" ]] then result_path=${tmp_result_path%_*}_HASH_IDFA echo $result_path elif [[ $tmp_result_path =~ "imei" ]]; then result_path=${tmp_result_path%_*}_HASH_IMEI echo $result_path else echo "error" exit 1 fi hadoop fs -text ${ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_RES_PATH_TMP}/${path}/* > ${result_path}.csv split -b 160m ${result_path}.csv -d -a 1 ${result_path}_ for new_path in $(ls ${result_path}_*) do zip ${new_path}.zip ${new_path} hadoop fs -put ${new_path}.zip "${ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_RES_PATH}/" rm -f ${new_path}* done rm -f ${result_path}.csv done hadoop fs -touchz ${ALI_USERINFO_TO_3S_GUANGDIANTONG_DELETE_RES_PATH}/_SUCCESS