Commit 5f348fef by wang-jinfeng

init mobvista-dmp

parents

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

type=command
dependencies=3s_tracking_install_total_v2
command=sh -x 3s_install_device_tag.sh
\ No newline at end of file
#!/bin/sh
# # # # # # # # # # # # # # # # # # # # # #
# @author :fengliang
# @revision: 2017-11-16
# @desc : 将当天3s install用户添加到ods_dmp_user_info表
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
expire_date=$(date +%Y%m%d -d "-4 day $today")
expire_date_path=$(date +%Y/%m/%d -d "-4 day $today")
business='3s'
source='manual'
storeSource="store"
output_path="${DM_DEVICE_TAG_PATH}/${date_path}/${source}/${business}"
store_output_path="${DM_DEVICE_TAG_PATH}/${date_path}/${storeSource}/${business}"
expire_path="${DM_DEVICE_TAG_PATH}/${expire_date_path}/${source}/${business}"
expire_store_path="${DM_DEVICE_TAG_PATH}/${expire_date_path}/${storeSource}/${business}"
input_path="${DM_INSTALL_LIST}_v2/${date_path}/${business}"
app_tag_path="${APP_TAG_PATH}/${date_path}"
check_await "${app_tag_path}/_SUCCESS"
check_await "${input_path}/_SUCCESS"
hadoop fs -rmr $output_path
hadoop fs -rmr $store_output_path
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.sql.shuffle.partitions=20 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 4 \
../${JAR} \
-date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 20
if [ $? -ne 0 ]; then
exit 255
fi
# 挂载分区
mount_partition "dmp_device_tag" "dt='${date}', source='${source}', business='${business}'" "${output_path}"
mount_partition "dmp_device_tag" "dt='${date}', source='${storeSource}', business='${business}'" "${store_output_path}"
# 补写成功标志
hadoop fs -touchz ${store_output_path}/_SUCCESS
# 删除过期的分区及删除对应路径
unmount_partition "dmp_device_tag" "dt='${expire_date}', source='${source}', business='${business}'" "${expire_path}"
unmount_partition "dmp_device_tag" "dt='${expire_date}', source='${storeSource}', business='${business}'" "${expire_store_path}"
\ No newline at end of file
type=command
dependencies=3s_tracking_install_daily
command=sh -x 3s_install_device_tag_daily.sh
\ No newline at end of file
#!/bin/sh
# # # # # # # # # # # # # # # # # # # # # #
# @author :wangjf
# @revision:2019-03-27 17:01:50
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
expire_date=$(date +%Y%m%d -d "-32 day $today")
expire_date_path=$(date +%Y/%m/%d -d "-32 day $today")
business='3s'
source='manual'
storeSource="store"
output_path="${DM_DEVICE_TAG_DAILY_PATH}/${date_path}/${source}/${business}"
store_output_path="${DM_DEVICE_TAG_DAILY_PATH}/${date_path}/${storeSource}/${business}"
expire_path="${DM_DEVICE_TAG_DAILY_PATH}/${expire_date_path}/${source}/${business}"
expire_store_path="${DM_DEVICE_TAG_DAILY_PATH}/${expire_date_path}/${storeSource}/${business}"
app_tag_path="${APP_TAG_PATH}/${date_path}"
check_await "${app_tag_path}/_SUCCESS"
hadoop fs -rmr ${output_path}
hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.tracking_3s.TrackingTagDaily \
--name "mobvista.dmp.datasource.tracking_3s.TrackingTagDaily_wangjf_${date}" \
--conf spark.sql.shuffle.partitions=5 \
--conf spark.default.parallelism=5 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 3 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 2
if [[ $? -ne 0 ]]; then
exit 255
fi
# 挂载分区
mount_partition "dm_device_tag_daily" "dt='${date}', source='${source}', business='${business}'" "${output_path}"
mount_partition "dm_device_tag_daily" "dt='${date}', source='${storeSource}', business='${business}'" "${store_output_path}"
# 补写成功标志
if hadoop fs -ls ${store_output_path} > /dev/null 2>&1
then
hadoop fs -touchz ${store_output_path}/_SUCCESS
else
hadoop fs -mkdir -p ${store_output_path}
hadoop fs -touchz ${store_output_path}/_SUCCESS
fi
# 删除过期的分区及删除对应路径
unmount_partition "dm_device_tag_daily" "dt='${expire_date}', source='${source}', business='${business}'" "${expire_path}"
unmount_partition "dm_device_tag_daily" "dt='${expire_date}', source='${storeSource}', business='${business}'" "${expire_store_path}"
\ No newline at end of file
type=command
dependencies=3s_tracking_install_total_orc
command=echo "3s job end!"
\ No newline at end of file
#!/bin/sh
source ../dmp_env.sh
today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
expire_date=$(date +%Y%m%d -d "-4 day $today")
expire_date_path=$(date +%Y/%m/%d -d "-4 day $today")
business='3s'
source='manual'
storeSource="store"
output_path="${DMP_DEVICE_TAG_PATH}/${date_path}/${source}/${business}"
store_output_path="${DMP_DEVICE_TAG_PATH}/${date_path}/${storeSource}/${business}"
expire_path="${DMP_DEVICE_TAG_PATH}/${expire_date_path}/${source}/${business}"
expire_store_path="${DMP_DEVICE_TAG_PATH}/${expire_date_path}/${storeSource}/${business}"
input_path="${DMP_INSTALL_LIST}/${date_path}/${business}"
app_tag_path="${APP_TAG_PATH}/${date_path}"
check_await "${app_tag_path}/_SUCCESS"
check_await "${input_path}/_SUCCESS"
hadoop fs -rmr ${output_path}
hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--name "MatchInterestTagDailyV2.${date}.${business}" \
--conf spark.sql.shuffle.partitions=8 \
--conf spark.default.parallelism=8 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4
if [[ $? -ne 0 ]]; then
exit 255
fi
# 挂载分区
mount_partition "dmp_device_tag_daily" "dt='${date}', source='${source}', business='${business}'" "${output_path}"
mount_partition "dmp_device_tag_daily" "dt='${date}', source='${storeSource}', business='${business}'" "${store_output_path}"
# 补写成功标志
hadoop fs -touchz ${store_output_path}/_SUCCESS
# 删除过期的分区及删除对应路径
unmount_partition "dmp_device_tag_daily" "dt='${expire_date}', source='${source}', business='${business}'" "${expire_path}"
unmount_partition "dmp_device_tag_daily" "dt='${expire_date}', source='${storeSource}', business='${business}'" "${expire_store_path}"
\ No newline at end of file
type=command
dependencies=3s_tracking_install_daily
command=sh -x 3s_install_user_info.sh
\ No newline at end of file
#!/bin/sh
# # # # # # # # # # # # # # # # # # # # # #
# @author :fengliang
# @revision: 2017-11-16
# @desc : 将当天3s install用户添加到ods_dmp_user_info表
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
today=${ScheduleTime:-$1}
dt=$(date +"%Y%m%d" -d "-1 day $today")
date=$(date +"%Y-%m-%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
old_path=$(date +%Y/%m/%d -d "-2 day $today")
expire_date=$(date +%Y%m%d -d "-6 day $today")
expire_path=$(date +%Y/%m/%d -d "-6 day $today")
business_name="3s"
daily_path="${INSTALL_DAILY_3S}/$date_path"
age_path="${AGE_CALC_DEVICE}/$date_path"
gender_path="${GENDER_CALC_DEVICE}/$date_path"
old_total_path="${ODS_DMP_USER_INFO}/$old_path/${business_name}"
OUTPUT_PATH="${ODS_DMP_USER_INFO}/$date_path/${business_name}"
unmount_path="${ODS_DMP_USER_INFO}/$expire_path/${business_name}"
coalesce=`calculate_reduce_num "${old_total_path};${daily_path}"`
coalesce=$(( $coalesce*5 ))
check_await "${old_total_path}/_SUCCESS"
# check_await "${age_path}/_SUCCESS"
# check_await "${gender_path}/_SUCCESS"
#userInfoJob Parameter: LOG_TIME、dailyPath、dailyFormat、dailyDidIndex、dailyDidTypeIndex、dailyPltIndex、dailyCountryIndex、
# agePath、genderPath、totalPath、outputPath、coalesce、jar、exeNum、parallelism
userInfoJob "$date" "$daily_path" "text" "0" "1" "2" "5" "$age_path" "$gender_path" "$old_total_path" "$OUTPUT_PATH" ${coalesce} "../${JAR}" 80 400
mount_partition "ods_dmp_user_info" "dt='${dt}', business='${business_name}'" "$OUTPUT_PATH"
unmount_partition "ods_dmp_user_info" "dt='${expire_date}', business='${business_name}'" "$unmount_path"
\ No newline at end of file
type=command
dependencies=3s_tracking_interest_install,3s_install_user_info,3s_install_device_tag,3s_install_device_tag_daily,3s_tracking_install_total_orc
command=echo "3s job end!"
\ No newline at end of file
type=command
dependencies=dim_campaign_3s_list,bigmedia_domestic_launch_total
command=sh -x 3s_tracking_install_daily.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file :3s_tracking_install_daily.sh
# @author :liushuai
# @revision:2017-03-20 20:09
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
dt_yesterday=$(date +%Y%m%d -d "-2 day $ScheduleTime")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
dt_today_dash=$(date +%Y-%m-%d -d "-2 day $ScheduleTime")
date_path="${year}/${month}/${day}"
old_path=$(date -d"$LOG_TIME -1 day " +"%Y/%m/%d")
CAMPAIGN_INPUT_PATH="${DIM_CAMPAIGN_PACKAGE_3S}/${year}/${month}-${day}/"
INPUT_PATH_3S="${PATH_3S}/$date_path/"
TMP_OUT_PATH="${TMP_INSTALL_DAILY_3S}/$date_path"
OUT_PATH="${INSTALL_DAILY_3S}/$date_path/"
check_await "${INPUT_PATH_3S}/23/45/*"
hadoop fs -rm -r ${TMP_OUT_PATH}
hadoop jar ../${JAR} mobvista.dmp.datasource.tracking.mapreduce.TrackingInstallDailyMR \
-Dtask.date=${year}-${month}-${day} \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
"${CAMPAIGN_INPUT_PATH}" "${INPUT_PATH_3S}" "${TMP_OUT_PATH}"
if [ $? -ne 0 ];then
exit 255
fi
# 为bundleId 匹配 packageName
matchBundlePackage "$date_path" "$old_path" "2" "3" "$TMP_OUT_PATH" "$OUT_PATH" "3s" "../${JAR}"
mount_partition "etl_3s_install_daily" "\`date\`='$LOG_TIME'" "$OUT_PATH" || exit 1
# 国内大媒体入库dmp系统
hive_cmd "set mapred.reduce.tasks = 15;
set hive.exec.compress.output=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
insert overwrite table dwh.etl_3s_install_daily_tmp
select lower(device_id) device_id,device_type,platform,package_name,update_date,country from dwh.etl_3s_install_daily where \`date\` = '${LOG_TIME}'
union
select lower(device_id) device_id,device_type,platform,package_name, '${dt_today_dash}' update_date,'CN' country from dwh.ods_bigmedia_domestic_daily where dt = '${dt_yesterday}' and package_name !=''
"
hadoop fs -rmr $OUT_PATH*
if [ $? -eq 0 ];then
hadoop fs -cp s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/etl_3s_install_daily_tmp/* $OUT_PATH
fi
hadoop fs -touchz ${OUT_PATH}_SUCCESS
type=command
dependencies=3s_tracking_install_daily
command=sh -x 3s_tracking_install_total.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file :3s_tracking_install_total.sh
# @author :liushuai
# @revision:2017-03-21 14:09
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
YESTERDAY=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
old_year=${YESTERDAY:0:4}
old_month=${YESTERDAY:4:2}
old_day=${YESTERDAY:6:2}
INSTALL_DAILY_3S="${INSTALL_DAILY_3S}/${year}/${month}/${day}/"
INPUT_INSTALL_TOTAL_PATH="$(get_recently_dir "$DM_INSTALL_LIST" "$YESTERDAY" "3s")"
OUT_PATH="${DM_INSTALL_LIST}/${year}/${month}/${day}/3s"
REDUCE_NUM=$(calculate_reduce_num "$INPUT_INSTALL_TOTAL_PATH")
check_await "${INPUT_INSTALL_TOTAL_PATH}/_SUCCESS"
hadoop fs -rm -r ${OUT_PATH}
hadoop jar ../${JAR} mobvista.dmp.datasource.tracking.mapreduce.TrackingInstallTotalMR \
-Dtask.date=${year}-${month}-${day} \
-Dmapreduce.job.reduces=${REDUCE_NUM} \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
"${INSTALL_DAILY_3S}" "${INPUT_INSTALL_TOTAL_PATH}" "${OUT_PATH}"
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_install_list" "year='$year', month='$month', day='$day', business='3s'" "$OUT_PATH"
\ No newline at end of file
type=command
dependencies=3s_tracking_install_daily
command=sh -x 3s_tracking_install_total_orc.sh
\ No newline at end of file
#!/bin/bash
source ../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
YESTERDAY=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
old_year=${YESTERDAY:0:4}
old_month=${YESTERDAY:4:2}
old_day=${YESTERDAY:6:2}
business="3s"
DAILY_INPUT="${INSTALL_DAILY_3S}/${year}/${month}/${day}/"
check_await "${DAILY_INPUT}/_SUCCESS"
sleep 60
INPUT_PATH="${DMP_INSTALL_LIST}/${old_year}/${old_month}/${old_day}/$business"
check_await "${INPUT_PATH}/_SUCCESS"
OUTPUT="${DMP_INSTALL_LIST}/${year}/${month}/${day}/${business}"
expire_date=$(date +%Y%m%d -d "-4 day $LOG_TIME")
expire_date_path=$(date +"%Y/%m/%d" -d "-4 day ${LOG_TIME}")
EXPIRE_OUTPUT_PATH="${DMP_INSTALL_LIST}/${expire_date_path}/${business}"
# --conf spark.shuffle.memoryFraction=0.4 \
# --conf spark.storage.memoryFraction=0.4 \
# --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
spark-submit --class mobvista.dmp.common.InstallListLogic \
--name "DmpInstallList.${business}.${LOG_TIME}" \
--conf spark.sql.shuffle.partitions=20 \
--conf spark.default.parallelism=20 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \
../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 10
if [[ $? -ne 0 ]];then
exit 255
fi
mount_partition "dmp_install_list" "dt='$LOG_TIME', business='$business'" "$OUTPUT"
# 删除过期的分区及删除对应路径
unmount_partition "dmp_install_list" "dt='${expire_date}', business='${business}'" "${EXPIRE_OUTPUT_PATH}"
\ No newline at end of file
type=command
dependencies=3s_tracking_install_total
command=sh -x 3s_tracking_install_total_v2.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @author :fengliang
# @revision:2017-09-01
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
date_path=$(date +%Y/%m/%d -d "-1 day $ScheduleTime")
INPUT_PATH="${DM_INSTALL_LIST}/$date_path/3s"
OUTPUT_PATH="${DM_INSTALL_LIST}_v2/$date_path/3s"
check_await ${INPUT_PATH}/_SUCCESS
hadoop fs -rm -r ${OUTPUT_PATH}
hadoop jar ../${JAR} mobvista.dmp.main.ParseInstallRCFile \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
${INPUT_PATH} ${OUTPUT_PATH} 20
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_install_list_v2" "dt='$LOG_TIME', business='3s'" "$OUTPUT_PATH"
if [ $? -ne 0 ];then
exit 255
fi
\ No newline at end of file
type=command
dependencies=3s_tracking_install_total
command=sh -x 3s_tracking_interest_install.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : 3s_tracking_interest_install.sh
# @author: liushuai
# @date : 17-03-21
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
INPUT_INSTALL_PATH="${DM_INSTALL_LIST}/${year}/${month}/${day}/3s"
INPUT_APP_TAG_PATH="$APP_TAG_PATH/${year}/${month}/${day}/"
OUTPUT_PATH="${DM_INTEREST_PATH}/${year}/${month}/${day}/3s"
check_await "$INPUT_APP_TAG_PATH/_SUCCESS"
REDUCE_NUM=$(calculate_reduce_num "$INPUT_INSTALL_PATH")
REDUCE_NUM=20
hadoop fs -rm -r ${OUTPUT_PATH}
hadoop jar ../${JAR} mobvista.dmp.common.InterestDeviceDistinctMR \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
-Dmapreduce.map.memory.mb=4096 \
-Dmapreduce.map.java.opts=-Xmx2458m \
-Dmapreduce.reduce.memory.mb=4096 \
-Dmapreduce.reduce.java.opts=-Xmx2458m \
-Dmapred.max.split.size=536870912 \
-Dmapred.min.split.size.per.node=536870912 \
-Dmapred.min.split.size.per.rack=536870912 \
"$INPUT_APP_TAG_PATH/part-r-00000" "$INPUT_INSTALL_PATH" "$OUTPUT_PATH" "3s install interest job"
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_interest_tag" "year='$year', month='$month', day='$day', business='3s'" "$OUTPUT_PATH"
echo "[3s Install + Interest Tag Total End!]"
\ No newline at end of file
type=command
command=sh -x dim_campaign_3s_list.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : dim_campaign_3s_list.sh
# @author: liushuai
# @date : 17-03-24
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
mysql -hreplica-portal-mysql-external.mobvista.com -udatacenter -pPr3j5KcQlM13 -P3306 -D mob_portal -e"
select id,platform,package_name from mob_camp_info;">>./camp.txt
OUTPUT_PATH="${DIM_CAMPAIGN_PACKAGE_3S}/${year}/${month}-${day}/"
hadoop fs -mkdir ${OUTPUT_PATH}
hadoop fs -put ./camp.txt ${OUTPUT_PATH}
rm ./camp.txt
mount_partition "dim_campaign_package_3s" "year='$year', \`date\`='$month-$day'" "$OUTPUT_PATH"
\ No newline at end of file
type=command
command=sh -x 3s_postback_daily.sh
\ No newline at end of file
#!/usr/bin/env bash
source ../dmp_env.sh
today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
yesterday=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d")
dt_today=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d")
dt_yesterday=$(date -d "$ScheduleTime 2 days ago" +"%Y%m%d")
echo ${today}
echo ${yesterday}
: '
#由日期获得上周日日期
week=`date -d "$today" +%w`
echo "week=$week"
if [ "$week" -eq "0" ];then
week=7 #若为周日,则表示为7
fi
if [ "$week" -eq "1" ];then
week=8 #若为周一,则表示为8,取上上周日数据
fi
last_sunday=$(date +%Y%m%d -d "-$week day $today")
echo "last_sunday=$last_sunday"
'
OUTPUT_PATH="${ETL_3S_POSTBACK_DAILY_PATH}/${today}"
POSTBACK_INSTALL_3S="${POSTBACK_INSTALL_3S_PATH}/${today}/virginia/23/_SUCCESS"
POSTBACK_EVENT_3S="${POSTBACK_EVENT_3S_PATH}/${today}/virginia/23/_SUCCESS"
check_await "${POSTBACK_INSTALL_3S}"
check_await "${POSTBACK_EVENT_3S}"
check_await "${DEVICE_ID_MD5_MATCH_PATH}/${yesterday}/_SUCCESS"
hadoop fs -rm -r "$OUTPUT_PATH/"
spark-submit --class mobvista.dmp.datasource.postback_3s.PostBackDaily \
--conf spark.network.timeout=720s \
--conf spark.default.parallelism=2000 \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 3 --num-executors 100 \
../${JAR} -output ${OUTPUT_PATH} -coalesce 100 \
-today ${dt_today} -last_sunday ${dt_yesterday}
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "etl_3s_postback_daily" "dt='${dt_today}'" "$OUTPUT_PATH"
hadoop fs -touchz ${OUTPUT_PATH}/_SUCCESS
\ No newline at end of file
type=command
retries=3
dependencies=3s_postback_install_list_v2
command=sh -x 3s_postback_device_tag.sh
\ No newline at end of file
#!/usr/bin/env bash
source ../dmp_env.sh
today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
expire_date=$(date +%Y%m%d -d "-4 day $today")
expire_date_path=$(date +%Y/%m/%d -d "-4 day $today")
source='manual'
business='allpb'
storeSource="store"
output_path="${DM_DEVICE_TAG_PATH}/${date_path}/${source}/${business}"
store_output_path="${DM_DEVICE_TAG_PATH}/${date_path}/${storeSource}/${business}"
expire_path="${DM_DEVICE_TAG_PATH}/${expire_date_path}/${source}/${business}"
expire_store_path="${DM_DEVICE_TAG_PATH}/${expire_date_path}/${storeSource}/${business}"
input_path="${DM_INSTALL_LIST}_v2/${date_path}/${business}"
app_tag_path="${APP_TAG_PATH}/${date_path}"
check_await "${app_tag_path}/_SUCCESS"
check_await "${input_path}/_SUCCESS"
hadoop fs -rmr ${output_path}
hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.sql.shuffle.partitions=1000 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 3 --num-executors 40 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1000
if [[ $? -ne 0 ]]; then
exit 255
fi
# 挂载分区
mount_partition "dmp_device_tag" "dt='${date}', source='${source}', business='${business}'" "${output_path}"
mount_partition "dmp_device_tag" "dt='${date}', source='${storeSource}', business='${business}'" "${store_output_path}"
# 补写成功标志
hadoop fs -touchz ${store_output_path}/_SUCCESS
# 删除过期的分区及删除对应路径
unmount_partition "dmp_device_tag" "dt='${expire_date}', source='${source}', business='${business}'" "${expire_path}"
unmount_partition "dmp_device_tag" "dt='${expire_date}', source='${storeSource}', business='${business}'" "${expire_store_path}"
\ No newline at end of file
type=command
dependencies=3s_postback_daily
command=sh -x 3s_postback_device_tag_daily.sh
\ No newline at end of file
#!/bin/sh
source ../dmp_env.sh
today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
expire_date=$(date +%Y%m%d -d "-32 day $today")
expire_date_path=$(date +%Y/%m/%d -d "-32 day $today")
business='allpb'
source='manual'
storeSource="store"
output_path="${DM_DEVICE_TAG_DAILY_PATH}/${date_path}/${source}/${business}"
store_output_path="${DM_DEVICE_TAG_DAILY_PATH}/${date_path}/${storeSource}/${business}"
expire_path="${DM_DEVICE_TAG_DAILY_PATH}/${expire_date_path}/${source}/${business}"
expire_store_path="${DM_DEVICE_TAG_DAILY_PATH}/${expire_date_path}/${storeSource}/${business}"
app_tag_path="${APP_TAG_PATH}/${date_path}"
check_await "${app_tag_path}/_SUCCESS"
hadoop fs -rmr ${output_path}
hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.postback_3s.PostBackTagDaily \
--name "PostBackTagDaily" \
--conf spark.sql.shuffle.partitions=10 \
--conf spark.default.parallelism=10 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 5
if [[ $? -ne 0 ]]; then
exit 255
fi
# 挂载分区
mount_partition "dm_device_tag_daily" "dt='${date}', source='${source}', business='${business}'" "${output_path}"
mount_partition "dm_device_tag_daily" "dt='${date}', source='${storeSource}', business='${business}'" "${store_output_path}"
# 补写成功标志
if hadoop fs -ls ${store_output_path} > /dev/null 2>&1
then
hadoop fs -touchz ${store_output_path}/_SUCCESS
else
hadoop fs -mkdir -p ${store_output_path}
hadoop fs -touchz ${store_output_path}/_SUCCESS
fi
# 删除过期的分区及删除对应路径
unmount_partition "dm_device_tag_daily" "dt='${expire_date}', source='${source}', business='${business}'" "${expire_path}"
unmount_partition "dm_device_tag_daily" "dt='${expire_date}', source='${storeSource}', business='${business}'" "${expire_store_path}"
\ No newline at end of file
type=command
dependencies=3s_postback_install_total_orc
command=echo "3s_postback_install_total_orc job end!"
\ No newline at end of file
#!/bin/sh
source ../dmp_env.sh
today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
expire_date=$(date +%Y%m%d -d "-4 day $today")
expire_date_path=$(date +%Y/%m/%d -d "-4 day $today")
business='allpb'
source='manual'
storeSource="store"
output_path="${DMP_DEVICE_TAG_PATH}/${date_path}/${source}/${business}"
store_output_path="${DMP_DEVICE_TAG_PATH}/${date_path}/${storeSource}/${business}"
expire_path="${DMP_DEVICE_TAG_PATH}/${expire_date_path}/${source}/${business}"
expire_store_path="${DMP_DEVICE_TAG_PATH}/${expire_date_path}/${storeSource}/${business}"
input_path="${DMP_INSTALL_LIST}/${date_path}/${business}"
app_tag_path="${APP_TAG_PATH}/${date_path}"
check_await "${app_tag_path}/_SUCCESS"
check_await "${input_path}/_SUCCESS"
hadoop fs -rmr ${output_path}
hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--name "MatchInterestTagDailyV2.${date}.${business}" \
--conf spark.sql.shuffle.partitions=100 \
--conf spark.default.parallelism=100 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 5 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 40
if [[ $? -ne 0 ]]; then
exit 255
fi
# 挂载分区
mount_partition "dmp_device_tag_daily" "dt='${date}', source='${source}', business='${business}'" "${output_path}"
mount_partition "dmp_device_tag_daily" "dt='${date}', source='${storeSource}', business='${business}'" "${store_output_path}"
# 补写成功标志
hadoop fs -touchz ${store_output_path}/_SUCCESS
# 删除过期的分区及删除对应路径
unmount_partition "dmp_device_tag_daily" "dt='${expire_date}', source='${source}', business='${business}'" "${expire_path}"
unmount_partition "dmp_device_tag_daily" "dt='${expire_date}', source='${storeSource}', business='${business}'" "${expire_store_path}"
\ No newline at end of file
type=command
retries=3
dependencies=3s_postback_daily
command=sh -x 3s_postback_install_list.sh
\ No newline at end of file
#!/bin/sh
source ../dmp_env.sh
today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
yesterday=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d")
dt_today=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d")
dt=$(date -d "$ScheduleTime 1 days ago" +"%Y-%m-%d")
dt_yesterday=$(date -d "$ScheduleTime 2 days ago" +"%Y%m%d")
echo ${today}
echo ${yesterday}
INPUT_PATH="${ETL_3S_POSTBACK_DAILY_PATH}/${today}"
OLD_INPUT_PATH="${DM_INSTALL_LIST}/${yesterday}/allpb"
OUTPUT_PATH="${DM_INSTALL_LIST}/${today}/allpb"
check_await "$INPUT_PATH/_SUCCESS"
check_await "$OLD_INPUT_PATH/_SUCCESS"
hadoop fs -rm -r "$OUTPUT_PATH/"
spark-submit --class mobvista.dmp.datasource.postback_3s.PostBackInstallList \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.network.timeout=720s \
--conf spark.default.parallelism=1000 \
--master yarn --deploy-mode cluster --name PostBackInstallList --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 20 \
../${JAR} -input ${INPUT_PATH} -oldInput ${OLD_INPUT_PATH} -output ${OUTPUT_PATH} -date ${dt} -parallelism 1000 -coalesce 400
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_install_list" "year='${dt_today:0:4}', month='${dt_today:4:2}', day='${dt_today:6:2}', business='allpb'" "$OUTPUT_PATH"
\ No newline at end of file
type=command
dependencies=3s_postback_install_list
command=sh -x 3s_postback_install_list_v2.sh
\ No newline at end of file
#!/bin/bash
source ../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
date_path=$(date +%Y/%m/%d -d "-1 day $ScheduleTime")
business="allpb"
INPUT_PATH="${DM_INSTALL_LIST}/$date_path/${business}"
OUTPUT_PATH="${DM_INSTALL_LIST}_v2/$date_path/${business}"
check_await ${INPUT_PATH}/_SUCCESS
hadoop fs -rm -r ${OUTPUT_PATH}
REDUCE_NUM=$(calculate_reduce_num ${INPUT_PATH})
hadoop jar ../${JAR} mobvista.dmp.main.ParseInstallRCFile \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
${INPUT_PATH} ${OUTPUT_PATH} ${REDUCE_NUM}
mount_partition "dm_install_list_v2" "dt='$LOG_TIME', business='${business}'" "$OUTPUT_PATH"
if [ $? -ne 0 ];then
exit 255
fi
type=command
dependencies=3s_postback_daily
command=sh -x 3s_postback_install_total_orc.sh
\ No newline at end of file
#!/bin/bash
source ../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
YESTERDAY=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
old_year=${YESTERDAY:0:4}
old_month=${YESTERDAY:4:2}
old_day=${YESTERDAY:6:2}
business="allpb"
DAILY_INPUT="${ETL_3S_POSTBACK_DAILY_PATH}/${year}/${month}/${day}/"
check_await "${DAILY_INPUT}/_SUCCESS"
sleep 60
INPUT_PATH="${DMP_INSTALL_LIST}/${old_year}/${old_month}/${old_day}/$business"
check_await "${INPUT_PATH}/_SUCCESS"
OUTPUT="${DMP_INSTALL_LIST}/${year}/${month}/${day}/${business}"
expire_date=$(date +%Y%m%d -d "-4 day $LOG_TIME")
expire_date_path=$(date +"%Y/%m/%d" -d "-4 day ${LOG_TIME}")
EXPIRE_OUTPUT_PATH="${DMP_INSTALL_LIST}/${expire_date_path}/${business}"
# --conf spark.shuffle.memoryFraction=0.4 \
# --conf spark.storage.memoryFraction=0.4 \
# --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
spark-submit --class mobvista.dmp.common.InstallListLogic \
--name "DmpInstallList.${business}.${LOG_TIME}" \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 25 \
../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 200
if [[ $? -ne 0 ]];then
exit 255
fi
mount_partition "dmp_install_list" "dt='$LOG_TIME', business='$business'" "$OUTPUT"
# 删除过期的分区及删除对应路径
unmount_partition "dmp_install_list" "dt='${expire_date}', business='${business}'" "${EXPIRE_OUTPUT_PATH}"
\ No newline at end of file
type=command
dependencies=3s_postback_daily
command=sh -x 3s_postback_install_user_info.sh
\ No newline at end of file
#!/bin/sh
source ../dmp_env.sh
today=${ScheduleTime:-$1}
dt=$(date +"%Y%m%d" -d "-1 day $today")
date=$(date +"%Y-%m-%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
old_path=$(date +%Y/%m/%d -d "-2 day $today")
expire_date=$(date +%Y%m%d -d "-6 day $today")
expire_path=$(date +%Y/%m/%d -d "-6 day $today")
business_name="allpb"
daily_path="${ETL_3S_POSTBACK_DAILY_PATH}/$date_path"
age_path="${AGE_CALC_DEVICE}/$date_path"
gender_path="${GENDER_CALC_DEVICE}/$date_path"
old_total_path="${ODS_DMP_USER_INFO}/$old_path/${business_name}"
OUTPUT_PATH="${ODS_DMP_USER_INFO}/$date_path/${business_name}"
unmount_path="${ODS_DMP_USER_INFO}/$expire_path/${business_name}"
coalesce=`calculate_reduce_num "${old_total_path};${daily_path}"`
coalesce=$(( $coalesce*5 ))
check_await "${old_total_path}/_SUCCESS"
userInfoJob "$date" "$daily_path" "orc" "0" "1" "2" "4" "$age_path" "$gender_path" "$old_total_path" "$OUTPUT_PATH" ${coalesce} "../${JAR}" 20 10
mount_partition "ods_dmp_user_info" "dt='${dt}', business='${business_name}'" "$OUTPUT_PATH"
unmount_partition "ods_dmp_user_info" "dt='${expire_date}', business='${business_name}'" "$unmount_path"
\ No newline at end of file
type=command
retries=3
dependencies=3s_postback_install_list
command=sh -x 3s_postback_interest_tag.sh
\ No newline at end of file
#!/bin/bash
source ../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
date=$(date +"%Y%m%d" -d "-1 day $ScheduleTime")
date_path=$(date +%Y/%m/%d -d "-1 day $ScheduleTime")
business='allpb'
INPUT_INSTALL_PATH="${DM_INSTALL_LIST}/$year/$month/$day/${business}"
INPUT_APP_TAG_PATH="${APP_TAG_PATH}/$year/$month/$day"
OUTPUT_PATH="${DM_INTEREST_PATH}/${year}/${month}/${day}/${business}"
check_await "${INPUT_APP_TAG_PATH}/_SUCCESS"
REDUCE_NUM=$(calculate_reduce_num ${INPUT_INSTALL_PATH})
hadoop fs -rm -r ${OUTPUT_PATH}
hadoop jar ../${JAR} mobvista.dmp.common.InterestDeviceDistinctMR \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
-Dmapreduce.map.memory.mb=4096 \
-Dmapreduce.map.java.opts=-Xmx2458m \
-Dmapreduce.reduce.memory.mb=4096 \
-Dmapreduce.reduce.java.opts=-Xmx2458m \
-Dmapred.max.split.size=536870912 \
-Dmapred.min.split.size.per.node=536870912 \
-Dmapred.min.split.size.per.rack=536870912 \
"${INPUT_APP_TAG_PATH}/part-r-00000" "${INPUT_INSTALL_PATH}" "${OUTPUT_PATH}" "3s postback interest job"
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_interest_tag" "year='$year', month='$month', day='$day', business='${business}'" "$OUTPUT_PATH"
echo "[3s postback Interest Tag End!]"
type=command
retries=3
dependencies=3s_postback_device_tag,3s_postback_interest_tag,3s_postback_install_user_info,3s_postback_device_tag_daily,3s_postback_install_total_orc
command=echo "3s_postback job end!"
\ No newline at end of file
type=command
dependencies=merge_campaign_list
command=bash -x ad_server_package.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : ad_server_package.sh
# @author: rongpei
# @date : 17-04-27
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
date_path="${year}/${month}/${day}"
old_path=$(date -d"$LOG_TIME -1 day " +"%Y/%m/%d")
INPUT_SERVER_EMPTY_ADN="s3://mob-ad/adn/adn_net/notice/ad_server_empty/$date_path/*/*"
INPUT_SERVER_ADN="s3://mob-ad/adn/adn_net/notice/ad_server/$date_path/*/*"
INPUT_CAMPAIGN="${DIM_ADN_CAMPAIGN}/$date_path"
TMP_CAMPAIGN_TAG_OUTPUT="${TMP_ADSERVER_PKG_TMP_PATH}/$date_path"
CAMPAIGN_TAG_OUTPUT="${TMP_ADSERVER_PACKAGE_PATH}/$date_path"
exit 0
check_await "s3://mob-ad/adn/adn_net/notice/ad_server_empty/$date_path/virginia/23"
check_await "s3://mob-ad/adn/adn_net/notice/ad_server/$date_path/virginia/23"
hadoop fs -rm "$TMP_CAMPAIGN_TAG_OUTPUT/*"
hadoop jar ../${JAR} mobvista.dmp.datasource.packagelist.mapreduce.AdServerPackage \
"$INPUT_SERVER_EMPTY_ADN" "$INPUT_SERVER_ADN" "$INPUT_CAMPAIGN" "$TMP_CAMPAIGN_TAG_OUTPUT" || exit 1
# 为bundleId 匹配 packageName
matchBundlePackage "$date_path" "$old_path" "2" "3" "$TMP_CAMPAIGN_TAG_OUTPUT" "$CAMPAIGN_TAG_OUTPUT" "adserver" "../${JAR}"
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "etl_adserver_install_daily" "\`date\`='$LOG_TIME'" "$CAMPAIGN_TAG_OUTPUT" || exit 1
remove_dir $TMP_CAMPAIGN_TAG_OUTPUT
echo "[ad_server_package End!]"
type=command
dependencies=adserver_interest_tag,adserver_install_list_v2
command=echo "adserver job end!"
\ No newline at end of file
type=command
dependencies=ad_server_package
command=bash -x adserver_install_list.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adserver_install_list.sh
# @author: rongpei
# @date : 17-04-27
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
date_path=$(date -d "$ScheduleTime 1 days ago" "+%Y/%m/%d")
old_date_path=$(date -d "$ScheduleTime 2 days ago" "+%Y/%m/%d")
TASK_DATE="$(date -d "$ScheduleTime 1 days ago" "+%Y-%m-%d")"
year=${date_path:0:4}
month=${date_path:5:2}
day=${date_path:8:2}
INPUT_TODAY_ADN="${TMP_ADSERVER_PACKAGE_PATH}/${date_path}"
INPUT_LOG_DATE_ADN="$DM_INSTALL_LIST/${old_date_path}/adserver"
CAMPAIGN_TAG_OUTPUT="$DM_INSTALL_LIST/$date_path/adserver"
exit 0
hadoop fs -rm -r "$CAMPAIGN_TAG_OUTPUT/"
REDUCE_NUM=$(calculate_reduce_num "${INPUT_TODAY_ADN};${INPUT_LOG_DATE_ADN}")
check_await "${INPUT_LOG_DATE_ADN}/_SUCCESS"
hadoop jar ../${JAR} mobvista.dmp.datasource.packagelist.mapreduce.MergePackageName \
-Dtask.date="$TASK_DATE" -Dmapreduce.job.reduces=${REDUCE_NUM} \
"$INPUT_TODAY_ADN" "$INPUT_LOG_DATE_ADN" "$CAMPAIGN_TAG_OUTPUT" || exit 1
mount_partition "dm_install_list" "year='$year', month='$month', day='$day', business='adserver'" "$CAMPAIGN_TAG_OUTPUT" || exit 1
echo "[merge_package_name End!]"
\ No newline at end of file
type=command
dependencies=adserver_install_list
command=bash -x adserver_install_list_v2.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @author :fengliang
# @revision:2017-09-01
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
date_path=$(date +%Y/%m/%d -d "-1 day $ScheduleTime")
INPUT_PATH="${DM_INSTALL_LIST}/$date_path/adserver"
OUTPUT_PATH="${DM_INSTALL_LIST}_v2/$date_path/adserver"
exit 0
check_await $INPUT_PATH/_SUCCESS
hadoop fs -rm -r $OUTPUT_PATH
hadoop jar ../${JAR} mobvista.dmp.main.ParseInstallRCFile $INPUT_PATH $OUTPUT_PATH 100
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_install_list_v2" "dt='$LOG_TIME', business='adserver'" "$OUTPUT_PATH"
if [ $? -ne 0 ];then
exit 255
fi
\ No newline at end of file
type=command
dependencies=adserver_install_list
command=bash -x adserver_interest_tag.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adserver_interest_tag.sh
# @author: houying
# @date : 17-5-2
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
INPUT_INSTALL_PATH="$DM_INSTALL_LIST/$year/$month/$day/adserver"
INPUT_APP_TAG_PATH="$APP_TAG_PATH/$year/$month/$day"
OUTPUT_INTEREST_TAG="$DM_INTEREST_PATH/$year/$month/$day/adserver"
exit 0
check_await "$INPUT_APP_TAG_PATH/_SUCCESS"
REDUCE_NUM=$(calculate_reduce_num "${INPUT_INSTALL_PATH};${INPUT_APP_TAG_PATH}")
hadoop fs -rm -r ${OUTPUT_INTEREST_TAG}
hadoop jar ../${JAR} mobvista.dmp.common.InterestDeviceDistinctMR \
-Dmapreduce.job.reduces=${REDUCE_NUM} \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
-Dmapreduce.map.memory.mb=4096 \
-Dmapreduce.map.java.opts=-Xmx2458m \
-Dmapreduce.reduce.memory.mb=4096 \
-Dmapreduce.reduce.java.opts=-Xmx2458m \
-Dmapreduce.reduce.shuffle.parallelcopies=50
-Dmapreduce.task.io.sort.factor=100 -Dmapreduce.task.io.sort.mb=512 \
"$INPUT_APP_TAG_PATH/part-r-00000" "$INPUT_INSTALL_PATH" "$OUTPUT_INTEREST_TAG" "adserver install interest job"
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_interest_tag" "year='$year', month='$month', day='$day', business='adserver'" "$OUTPUT_INTEREST_TAG" || exit 1
echo "[Ad Server + Interest Tag Total End!]"
\ No newline at end of file
type=command
command=sh -x merge_campaign_list.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : merge_package_name.sh
# @author: rongpei
# @date : 17-04-27
# @desc : 数据由运维从关系型数据库中导出活跃数据,程序将天数据和合并到总量数据中,数据对接人:运维-胡劲斌
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime" "+%Y%m%d")
today_year=${LOG_TIME:0:4}
today_month=${LOG_TIME:4:2}
today_day=${LOG_TIME:6:2}
LOG_TIME=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
yester_year=${LOG_TIME:0:4}
yester_month=${LOG_TIME:4:2}
yester_day=${LOG_TIME:6:2}
INPUT_DMP_DATA_ADN="$CAMPAIGN_LIST_SRC/$today_year/$today_month/$today_day"
INPUT_CAMPAIGN_ADN="$DIM_ADN_CAMPAIGN/$year/$month/$day"
CAMPAIGN_TAG_OUTPUT="$DIM_ADN_CAMPAIGN/$yester_year/$yester_month/$yester_day"
check_await $INPUT_DMP_DATA_ADN/_SUCCESS
hadoop fs -rm "$CAMPAIGN_TAG_OUTPUT/*"
: '
hadoop jar ../${JAR} mobvista.dmp.datasource.packagelist.mapreduce.MergeCampaignList \
-Dtask.date=${yester_year}-${yester_month}-${yester_day} \
"$INPUT_DMP_DATA_ADN" "$INPUT_CAMPAIGN_ADN" "$CAMPAIGN_TAG_OUTPUT" || exit 1
'
#把mapreduce改写为spark
spark-submit --class mobvista.dmp.datasource.packagelist.MergeCampaignList \
--conf spark.network.timeout=720s \
--conf spark.default.parallelism=100 \
--conf spark.sql.shuffle.partitions=100 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 4 \
../${JAR} -input_dmp_data_adn ${INPUT_DMP_DATA_ADN} -input_campaign_adn ${INPUT_CAMPAIGN_ADN} -coalesce 20 \
-output ${CAMPAIGN_TAG_OUTPUT} -today ${yester_year}-${yester_month}-${yester_day} || exit 1
mount_partition "dim_adn_campaign" "year='$yester_year', month='$yester_month', day ='$yester_day'" "$CAMPAIGN_TAG_OUTPUT" || exit 1
echo "[# merge_package_name END!]"
type=command
retries=3
command=sh -x mds_dmp_address_daily_adn.sh
\ No newline at end of file
#!/bin/sh
# # # # # # # # # # # # # # # # # # # # # #
# @author :fengliang
# @revision: 2017-12-12
# @desc : 抽取adn当天位置信息
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
today=${ScheduleTime:-$1}
dt=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +"%Y/%m/%d" -d "-1 day $today")
INPUT_PATH="${MDS_ADN_SDK_REQUEST_DAILY}/$date_path"
OUTPUT_PATH="${MDS_DMP_ADDRESS_DAILY}/$date_path/adn_request"
check_await "${INPUT_PATH}/_SUCCESS"
hadoop fs -rm -r $OUTPUT_PATH
spark-submit --class mobvista.dmp.datasource.address.AddressInfoTotal \
--conf spark.yarn.executor.memoryOverhead=1024 --conf spark.network.timeout=720s \
--conf spark.sql.shuffle.partitions=200 \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 40 \
../../${JAR} -input ${INPUT_PATH} -output ${OUTPUT_PATH} -dailyFormat "rcfile" -parallelism 200 -coalesce 20 \
-indices "0,2,3,5,6"
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "mds_dmp_address_daily" "dt='$dt', business='adn_request_sdk'" "$OUTPUT_PATH"
if [ $? -ne 0 ];then
exit 255
fi
\ No newline at end of file
type=command
command=sh -x adn_device_ids_daily.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adn_device_ids_daily.sh
# @author: chao.wu
# @date : 16-12-02
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
YESTERDAY=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
old_year=${YESTERDAY:0:4}
old_month=${YESTERDAY:4:2}
old_day=${YESTERDAY:6:2}
INPUT_ADN_DSP_PATH="$ETL_ADN_ORG_REQ_HOURS/$year/$month/$day"
OUTPUT_PATH="$DMP_ADN_REQUEST_DEVICE_IDS/$year/$month/$day/adn_request"
## REDUCE_NUM=$(calculate_reduce_num "$INPUT_ADN_DSP_PATH")
hadoop fs -rm -r ${OUTPUT_PATH}/*
export HADOOP_CLIENT_OPTS="-Xmx2096m $HADOOP_CLIENT_OPTS"
hadoop jar ../${JAR} mobvista.dmp.datasource.adn.mapreduce.AdnDeviceIdsMR \
-Dmapreduce.job.reduces=200 \
-Dmapreduce.map.memory.mb=4096 \
-Dmapreduce.map.java.opts=-Xmx2458m \
-Dmapreduce.reduce.memory.mb=4096 \
-Dmapreduce.reduce.java.opts=-Xmx2458m \
-Dtask.date="$year-$month-$day" \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
"$INPUT_ADN_DSP_PATH" "$OUTPUT_PATH" || exit 1
mount_partition "etl_ids_mapping" "year='$year', month='$month', day='$day', business='adn_request' " "$OUTPUT_PATH"
echo "[Adn request ids daily Tag End!]"
type=command
retries=3
dependencies=adn_org_etl_hours_singapore,adn_org_etl_hours_frankfurt,adn_org_etl_hours_seoul,adn_org_etl_hours_virginia
command=echo "adn org etl job end!"
\ No newline at end of file
type=command
retries=3
command=bash -x adn_org_etl_hours_frankfurt.sh
\ No newline at end of file
#!/bin/bash
source ../../dmp_env.sh
ScheduleTime=${ScheduleTime:-$1}
output_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d/%H")
input_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d")
yt=$(date -d "2 hours ago $ScheduleTime" "+%Y")
mt=$(date -d "2 hours ago $ScheduleTime" "+%m")
dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")
check_await "$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/_SUCCESS"
INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/frankfurt/$hhpath/*"
ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/frankfurt/${hhpath}"
hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=2000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region frankfurt || exit 1
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "etl_adn_org_request_daily_hours" "yt='${yt}',mt='${mt}',dt='${dt}',rg='frankfurt',ht='${hhpath}'" "$ETL_ADN_REQ_ORG_HOURS_PATH"
hadoop fs -touchz $ETL_ADN_REQ_ORG_HOURS_PATH/_SUCCESS
type=command
retries=3
command=bash -x adn_org_etl_hours_seoul.sh
\ No newline at end of file
#!/bin/bash
source ../../dmp_env.sh
ScheduleTime=${ScheduleTime:-$1}
output_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d/%H")
input_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d")
yt=$(date -d "2 hours ago $ScheduleTime" "+%Y")
mt=$(date -d "2 hours ago $ScheduleTime" "+%m")
dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")
check_await "$ADN_REQUEST_PATH/$input_date_path/seoul/$hhpath/_SUCCESS"
INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/seoul/$hhpath/*"
ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/seoul/${hhpath}"
hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=2000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region seoul || exit 1
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "etl_adn_org_request_daily_hours" "yt='${yt}',mt='${mt}',dt='${dt}',rg='seoul',ht='${hhpath}'" "$ETL_ADN_REQ_ORG_HOURS_PATH"
hadoop fs -touchz $ETL_ADN_REQ_ORG_HOURS_PATH/_SUCCESS
type=command
retries=3
command=bash -x adn_org_etl_hours_singapore.sh
\ No newline at end of file
#!/bin/bash
source ../../dmp_env.sh
ScheduleTime=${ScheduleTime:-$1}
output_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d/%H")
input_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d")
yt=$(date -d "2 hours ago $ScheduleTime" "+%Y")
mt=$(date -d "2 hours ago $ScheduleTime" "+%m")
dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")
check_await "$ADN_REQUEST_PATH/$input_date_path/singapore/$hhpath/_SUCCESS"
INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/singapore/$hhpath/*"
ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/singapore/${hhpath}"
hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=2000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 50 \
../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 400 -region singapore || exit 1
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "etl_adn_org_request_daily_hours" "yt='${yt}',mt='${mt}',dt='${dt}',rg='singapore',ht='${hhpath}'" "$ETL_ADN_REQ_ORG_HOURS_PATH"
hadoop fs -touchz $ETL_ADN_REQ_ORG_HOURS_PATH/_SUCCESS
type=command
retries=3
command=bash -x adn_org_etl_hours_virginia.sh
\ No newline at end of file
#!/bin/bash
source ../../dmp_env.sh
ScheduleTime=${ScheduleTime:-$1}
output_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d/%H")
input_date_path=$(date -d "2 hours ago $ScheduleTime" "+%Y/%m/%d")
yt=$(date -d "2 hours ago $ScheduleTime" "+%Y")
mt=$(date -d "2 hours ago $ScheduleTime" "+%m")
dt=$(date -d "2 hours ago $ScheduleTime" "+%d")
hhpath=$(date -d "2 hours ago $ScheduleTime" "+%H")
check_await "$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/_SUCCESS"
INPUT_ADN_PATH="$ADN_REQUEST_PATH/$input_date_path/virginia/$hhpath/*"
ETL_ADN_REQ_ORG_HOURS_PATH="${ETL_ADN_ORG_REQ_HOURS}/${input_date_path}/virginia/${hhpath}"
hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=2000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region virginia || exit 1
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "etl_adn_org_request_daily_hours" "yt='${yt}',mt='${mt}',dt='${dt}',rg='virginia',ht='${hhpath}'" "$ETL_ADN_REQ_ORG_HOURS_PATH"
hadoop fs -touchz $ETL_ADN_REQ_ORG_HOURS_PATH/_SUCCESS
type=command
retries=3
command=sh -x adn_request_device_total.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adn_request_device_total.sh
# @author: walt
# @date : 16-12-02
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
YESTERDAY=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
old_year=${YESTERDAY:0:4}
old_month=${YESTERDAY:4:2}
old_day=${YESTERDAY:6:2}
INPUT_PATH_TOTAL="$ADN_TOTAL_PATH/${old_year}/${old_month}/${old_day}"
INPUT_PATH_DAILY="$ETL_ADN_ORG_REQ_HOURS/${year}/${month}/${day}"
OUTPUT_PATH="$ADN_TOTAL_PATH/${year}/${month}/${day}"
echo "INPUT_PATH_TOTAL=$INPUT_PATH_TOTAL"
echo "INPUT_PATH_DAILY=$INPUT_PATH_DAILY"
echo "OUTPUT_PATH=$OUTPUT_PATH"
## REDUCE_NUM=$(calculate_reduce_num ${INPUT_PATH_TOTAL})
hadoop fs -rm ${OUTPUT_PATH}/*
export HADOOP_CLIENT_OPTS="-Xmx4096m $HADOOP_CLIENT_OPTS"
hadoop jar ../../${JAR} mobvista.dmp.datasource.adn.mapreduce.AdnRequestDeviceTotalMR \
-Dtask.date=${LOG_TIME} \
-Dmapreduce.map.memory.mb=4096 \
-Dmapreduce.map.java.opts=-Xmx2458m \
-Dmapreduce.reduce.memory.mb=4096 \
-Dmapreduce.reduce.java.opts=-Xmx2458m \
-Dmapreduce.job.reduces=1000 \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
"${INPUT_PATH_DAILY}" "${INPUT_PATH_TOTAL}" "${OUTPUT_PATH}" || exit 1
mount_partition "ods_adn_device_total" "year='${year}',month='${month}',day='${day}'" "$OUTPUT_PATH"
echo "[ADN device total End!]"
type=command
dependencies=mds_adn_request_daily
command=sh -x adn_request_user_info.sh
\ No newline at end of file
#!/bin/sh
# # # # # # # # # # # # # # # # # # # # # #
# @author :fengliang
# @revision: 2017-11-16
# @desc : 将当天3s install用户添加到ods_dmp_user_info表
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
today=${ScheduleTime:-$1}
dt=$(date +"%Y%m%d" -d "-1 day $today")
date=$(date +"%Y-%m-%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
old_path=$(date +%Y/%m/%d -d "-2 day $today")
expire_date=$(date +%Y%m%d -d "-6 day $today")
expire_path=$(date +%Y/%m/%d -d "-6 day $today")
business_name="adn_request"
daily_path="${MDS_ADN_SDK_REQUEST_DAILY}/$date_path"
age_path="${AGE_CALC_DEVICE}/$date_path"
gender_path="${GENDER_CALC_DEVICE}/$date_path"
old_total_path="${ODS_DMP_USER_INFO}/$old_path/${business_name}"
OUTPUT_PATH="${ODS_DMP_USER_INFO}/$date_path/${business_name}"
unmount_path="${ODS_DMP_USER_INFO}/$expire_path/${business_name}"
coalesce=`calculate_reduce_num "${old_total_path};${daily_path}"`
check_await "${old_total_path}/_SUCCESS"
# check_await "${age_path}/_SUCCESS"
# check_await "${gender_path}/_SUCCESS"
#userInfoJob Parameter: LOG_TIME、dailyPath、dailyFormat、dailyDidIndex、dailyDidTypeIndex、dailyPltIndex、dailyCountryIndex、
# agePath、genderPath、totalPath、outputPath、coalesce、jar、exeNum、parallelism
userInfoJob "$date" "$daily_path" "rcfile" "0" "1" "2" "6" "$age_path" "$gender_path" "$old_total_path" "$OUTPUT_PATH" $coalesce "../../${JAR}" 80 400
mount_partition "ods_dmp_user_info" "dt='${dt}', business='${business_name}'" "$OUTPUT_PATH"
mount_partition "ods_dmp_user_info" "dt='${dt}', business='adn_install'" "$OUTPUT_PATH"
unmount_partition "ods_dmp_user_info" "dt='${expire_date}', business='${business_name}'" "$unmount_path"
unmount_partition "ods_dmp_user_info" "dt='${expire_date}', business='adn_install'" "$unmount_path"
\ No newline at end of file
type=command
retries=3
command=sh -x adn_click_daily.sh
\ No newline at end of file
#!/usr/bin/env bash
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
INPUT_ADN_INSTALL_PATH="${ADN_CLICK_PATH}/$year/$month/$day"
OUTPUT_ADN_INSTALL_DAILY="${ETL_ADN_CLICK_DAILY}/$year/$month/$day"
check_await $INPUT_ADN_INSTALL_PATH/virginia/23/_SUCCESS
hadoop fs -rm -r "$OUTPUT_ADN_INSTALL_DAILY/"
spark-submit --class mobvista.dmp.datasource.adn.AdnClickDaily \
--name "AdnClickDaily.${LOG_TIME}" \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--master yarn --deploy-mode cluster \
--executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \
../.././DMP.jar \
-datetime ${LOG_TIME} -output ${OUTPUT_ADN_INSTALL_DAILY} -coalesce 200
if [[ $? -ne 0 ]]; then
exit 255
fi
: '
hadoop jar ../../${JAR} mobvista.dmp.datasource.adn.mapreduce.AdnClickDailyMR \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
"$INPUT_ADN_INSTALL_PATH" "$OUTPUT_ADN_INSTALL_DAILY" || exit 1
'
type=command
retries=3
dependencies=merge_adn_click_preclick,adn_install_daily
command=sh -x adn_install_click.sh
\ No newline at end of file
#!/usr/bin/env bash
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
INPUT_ADN_INSTALL_PATH="${ETL_ADN_INSTALL_TMP_DAILY}/$year/$month/$day/"
INPUT_CLICK_PATH="${ETL_ADN_MERGE_CLICK_PRE_CLICK}/$year/$month/$day"
OUTPUT_ADN_INSTALL_DAILY="${ETL_ADN_INSTALL_DAILY}/$year/$month/$day"
hadoop fs -rm -r "$OUTPUT_ADN_INSTALL_DAILY/"
spark-submit --class mobvista.dmp.datasource.adn.AdnClickJoinInstallDaily \
--name "AdnClickJoinInstallDaily.${LOG_TIME}" \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.default.parallelism=2000 \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--master yarn --deploy-mode cluster \
--executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 50 \
../.././DMP.jar \
-input_click ${INPUT_CLICK_PATH} -input_install ${INPUT_ADN_INSTALL_PATH} -output ${OUTPUT_ADN_INSTALL_DAILY} -coalesce 200
if [[ $? -ne 0 ]]; then
exit 255
fi
: '
hadoop jar ../../${JAR} mobvista.dmp.datasource.adn.mapreduce.AdnClickJoinInstallJob \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
"${INPUT_ADN_INSTALL_PATH}" "$INPUT_CLICK_PATH" "$OUTPUT_ADN_INSTALL_DAILY" || exit 1
'
mount_partition "etl_adn_install_daily" "\`date\`='$LOG_TIME'" "$OUTPUT_ADN_INSTALL_DAILY"
\ No newline at end of file
type=command
retries=3
command=sh -x adn_install_daily.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adn_install_daily.sh
# @author: houying
# @date : 16-11-1
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
date_path=$(date -d "$ScheduleTime 1 days ago" "+%Y/%m/%d")
old_path=$(date -d "$ScheduleTime 2 days ago" "+%Y/%m/%d")
INPUT_CAMPAIGN_LIST_PATH="$DIM_ADN_CAMPAIGN/$date_path"
INPUT_ADN_INSTALL_PATH="${ADN_INSTALL_PATH}/$date_path"
TMP_OUTPUT_ADN_INSTALL_DAILY="${TMP_INSTALL_DAILY_ADN}/$date_path"
OUTPUT_ADN_INSTALL_DAILY="$ETL_ADN_INSTALL_TMP_DAILY/$date_path"
check_await "$INPUT_ADN_INSTALL_PATH/virginia/23/_SUCCESS"
check_await "$INPUT_CAMPAIGN_LIST_PATH/_SUCCESS"
hadoop fs -rm -r "$TMP_OUTPUT_ADN_INSTALL_DAILY/*"
spark-submit --class mobvista.dmp.datasource.adn.AdnInstallDaily \
--name "AdnInstallDaily.${LOG_TIME}" \
--conf spark.sql.shuffle.partitions=100 \
--conf spark.default.parallelism=100 \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--master yarn --deploy-mode cluster \
--executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \
../.././DMP.jar \
-datetime ${LOG_TIME} -output ${TMP_OUTPUT_ADN_INSTALL_DAILY} -coalesce 20
if [[ $? -ne 0 ]]; then
exit 255
fi
: '
hadoop jar ../../${JAR} mobvista.dmp.datasource.adn.mapreduce.AdnInstallDailyMR \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
"$INPUT_CAMPAIGN_LIST_PATH" "$INPUT_ADN_INSTALL_PATH" "$TMP_OUTPUT_ADN_INSTALL_DAILY" || exit 1
'
# 为bundleId 匹配 packageName
matchBundlePackage "$date_path" "$old_path" "2" "5" "$TMP_OUTPUT_ADN_INSTALL_DAILY" "$OUTPUT_ADN_INSTALL_DAILY" "adn_install" "../../${JAR}"
if [[ $? -ne 0 ]]; then
exit 255
fi
echo "[Adn Install Daily End!]"
type=command
dependencies=adn_install_total_v2
command=sh -x adn_install_device_tag.sh
\ No newline at end of file
#!/bin/sh
# # # # # # # # # # # # # # # # # # # # # #
# @author :fengliang
# @revision: 2018-04-19
# @desc : 新标签体系匹配标签
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
expire_date=$(date +%Y%m%d -d "-4 day $today")
expire_date_path=$(date +%Y/%m/%d -d "-4 day $today")
source='manual'
business='adn_install'
storeSource="store"
output_path="${DM_DEVICE_TAG_PATH}/${date_path}/${source}/${business}"
store_output_path="${DM_DEVICE_TAG_PATH}/${date_path}/${storeSource}/${business}"
expire_path="${DM_DEVICE_TAG_PATH}/${expire_date_path}/${source}/${business}"
expire_store_path="${DM_DEVICE_TAG_PATH}/${expire_date_path}/${storeSource}/${business}"
input_path="${DM_INSTALL_LIST}_v2/${date_path}/${business}"
app_tag_path="${APP_TAG_PATH}/${date_path}"
check_await "${app_tag_path}/_SUCCESS"
check_await "${input_path}/_SUCCESS"
hadoop fs -rmr ${output_path}
hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.sql.shuffle.partitions=20 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 5 \
../../${JAR} \
-date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 20
if [ $? -ne 0 ]; then
exit 255
fi
# 挂载分区
mount_partition "dmp_device_tag" "dt='${date}', source='${source}', business='${business}'" "${output_path}"
mount_partition "dmp_device_tag" "dt='${date}', source='${storeSource}', business='${business}'" "${store_output_path}"
# 补写成功标志
hadoop fs -touchz ${store_output_path}/_SUCCESS
# 删除过期的分区及删除对应路径
unmount_partition "dmp_device_tag" "dt='${expire_date}', source='${source}', business='${business}'" "${expire_path}"
unmount_partition "dmp_device_tag" "dt='${expire_date}', source='${storeSource}', business='${business}'" "${expire_store_path}"
type=command
dependencies=adn_install_click
command=sh -x adn_install_device_tag_daily.sh
\ No newline at end of file
#!/bin/sh
# # # # # # # # # # # # # # # # # # # # # #
# @author :wangjf
# @revision:2019-03-27 17:01:50
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
expire_date=$(date +%Y%m%d -d "-32 day $today")
expire_date_path=$(date +%Y/%m/%d -d "-32 day $today")
business='adn_install'
source='manual'
storeSource="store"
output_path="${DM_DEVICE_TAG_DAILY_PATH}/${date_path}/${source}/${business}"
store_output_path="${DM_DEVICE_TAG_DAILY_PATH}/${date_path}/${storeSource}/${business}"
expire_path="${DM_DEVICE_TAG_DAILY_PATH}/${expire_date_path}/${source}/${business}"
expire_store_path="${DM_DEVICE_TAG_DAILY_PATH}/${expire_date_path}/${storeSource}/${business}"
app_tag_path="${APP_TAG_PATH}/${date_path}"
check_await "${app_tag_path}/_SUCCESS"
hadoop fs -rmr ${output_path}
hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.adn.AdnInstallTagDaily \
--name "mobvista.dmp.datasource.adn.AdnInstallTagDaily_wangjf_${date}" \
--conf spark.sql.shuffle.partitions=50 \
--conf spark.default.parallelism=10 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 4 --num-executors 5 \
../../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1
if [[ $? -ne 0 ]]; then
exit 255
fi
# 挂载分区
mount_partition "dm_device_tag_daily" "dt='${date}', source='${source}', business='${business}'" "${output_path}"
mount_partition "dm_device_tag_daily" "dt='${date}', source='${storeSource}', business='${business}'" "${store_output_path}"
# 补写成功标志
if hadoop fs -ls ${store_output_path} > /dev/null 2>&1
then
hadoop fs -touchz ${store_output_path}/_SUCCESS
else
hadoop fs -mkdir -p ${store_output_path}
hadoop fs -touchz ${store_output_path}/_SUCCESS
fi
# 删除过期的分区及删除对应路径
unmount_partition "dm_device_tag_daily" "dt='${expire_date}', source='${source}', business='${business}'" "${expire_path}"
unmount_partition "dm_device_tag_daily" "dt='${expire_date}', source='${storeSource}', business='${business}'" "${expire_store_path}"
\ No newline at end of file
type=command
dependencies=adn_install_total_orc
command=echo "adn_install_total_orc job end!"
\ No newline at end of file
#!/bin/sh
source ../../dmp_env.sh
today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
expire_date=$(date +%Y%m%d -d "-4 day $today")
expire_date_path=$(date +%Y/%m/%d -d "-4 day $today")
business='adn_install'
source='manual'
storeSource="store"
output_path="${DMP_DEVICE_TAG_PATH}/${date_path}/${source}/${business}"
store_output_path="${DMP_DEVICE_TAG_PATH}/${date_path}/${storeSource}/${business}"
expire_path="${DMP_DEVICE_TAG_PATH}/${expire_date_path}/${source}/${business}"
expire_store_path="${DMP_DEVICE_TAG_PATH}/${expire_date_path}/${storeSource}/${business}"
input_path="${DMP_INSTALL_LIST}/${date_path}/${business}"
app_tag_path="${APP_TAG_PATH}/${date_path}"
check_await "${app_tag_path}/_SUCCESS"
check_await "${input_path}/_SUCCESS"
hadoop fs -rmr ${output_path}
hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--name "MatchInterestTagDailyV2.${date}.${business}" \
--conf spark.sql.shuffle.partitions=8 \
--conf spark.default.parallelism=8 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \
../../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4
if [[ $? -ne 0 ]]; then
exit 255
fi
# 挂载分区
mount_partition "dmp_device_tag_daily" "dt='${date}', source='${source}', business='${business}'" "${output_path}"
mount_partition "dmp_device_tag_daily" "dt='${date}', source='${storeSource}', business='${business}'" "${store_output_path}"
# 补写成功标志
hadoop fs -touchz ${store_output_path}/_SUCCESS
# 删除过期的分区及删除对应路径
unmount_partition "dmp_device_tag_daily" "dt='${expire_date}', source='${source}', business='${business}'" "${expire_path}"
unmount_partition "dmp_device_tag_daily" "dt='${expire_date}', source='${storeSource}', business='${business}'" "${expire_store_path}"
\ No newline at end of file
type=command
dependencies=adn_interest_install,adn_install_device_tag,adn_install_device_tag_daily,adn_install_total_orc
command=echo "adn install finish"
\ No newline at end of file
type=command
retries=3
dependencies=adn_install_click
command=sh -x adn_install_total.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adn_install_total.sh
# @author: houying
# @date : 16-11-8
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
YESTERDAY=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
old_year=${YESTERDAY:0:4}
old_month=${YESTERDAY:4:2}
old_day=${YESTERDAY:6:2}
INPUT_ADN_INSTALL_DAILY_PATH="$ETL_ADN_INSTALL_DAILY/$year/$month/$day"
INPUT_ADN_INSTALL_TOTAL_PATH="$DM_INSTALL_LIST/$old_year/$old_month/$old_day/adn_install"
OUTPUT="$DM_INSTALL_LIST/$year/$month/$day/adn_install"
check_await "${INPUT_ADN_INSTALL_TOTAL_PATH}/_SUCCESS"
hadoop fs -rm -r "$OUTPUT"
hadoop jar ../../${JAR} mobvista.dmp.datasource.adn.mapreduce.AdnInstallTotalMR \
-Dtask.date=${year}-${month}-${day} \
-Dmapreduce.job.reduces=100 \
"$INPUT_ADN_INSTALL_DAILY_PATH" "$INPUT_ADN_INSTALL_TOTAL_PATH" "$OUTPUT" || exit 1
mount_partition "dm_install_list" "year='$year', month='$month', day='$day', business='adn_install'" "$OUTPUT"
echo "[Adn Install Total End!]"
type=command
dependencies=adn_install_click
command=sh -x adn_install_total_orc.sh
\ No newline at end of file
#!/bin/bash
source ../../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
YESTERDAY=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
old_year=${YESTERDAY:0:4}
old_month=${YESTERDAY:4:2}
old_day=${YESTERDAY:6:2}
business="adn_install"
DAILY_INPUT="${ETL_ADN_INSTALL_TMP_DAILY}/${year}/${month}/${day}/"
check_await "${DAILY_INPUT}/_SUCCESS"
sleep 60
INPUT_PATH="${DMP_INSTALL_LIST}/${old_year}/${old_month}/${old_day}/${business}"
check_await "${INPUT_PATH}/_SUCCESS"
OUTPUT="${DMP_INSTALL_LIST}/${year}/${month}/${day}/${business}"
expire_date=$(date +%Y%m%d -d "-4 day $LOG_TIME")
expire_date_path=$(date +"%Y/%m/%d" -d "-4 day ${LOG_TIME}")
EXPIRE_OUTPUT_PATH="${DMP_INSTALL_LIST}/${expire_date_path}/${business}"
spark-submit --class mobvista.dmp.common.InstallListLogic \
--name "DmpInstallList.${business}.${LOG_TIME}" \
--conf spark.sql.shuffle.partitions=400 \
--conf spark.default.parallelism=400 \
--conf spark.kryoserializer.buffer.max=256m \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 5 \
../../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 100
if [[ $? -ne 0 ]];then
exit 255
fi
mount_partition "dmp_install_list" "dt='$LOG_TIME', business='$business'" "$OUTPUT"
# 删除过期的分区及删除对应路径
unmount_partition "dmp_install_list" "dt='${expire_date}', business='${business}'" "${EXPIRE_OUTPUT_PATH}"
\ No newline at end of file
type=command
dependencies=adn_install_total
command=bash -x adn_install_total_v1.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adn_install_pkg_total_v1.sh
# @author : jinfeng.wang
# @time : 2020-05-22 16:06:07
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
LOG_TIME=${ScheduleTime:-$1}
date=$(date +%Y%m%d -d "-1 day $LOG_TIME")
date_path=$(date +"%Y/%m/%d" -d "-1 day ${LOG_TIME}")
year=${date:0:4}
month=${date:4:2}
day=${date:6:2}
BUSINESS="adn_install"
INPUT_MAPPING="${RUID_MAPPING}/${date_path}"
check_await "${INPUT_MAPPING}/_SUCCESS"
OUTPUT_PATH="${DM_INSTALL_LIST}_v1/$date_path/${BUSINESS}"
spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \
--name "FixInstallListRuid.${date}.${BUSINESS}" \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--conf spark.sql.adaptive.enabled=true \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 40 \
../../${JAR} \
-date ${date} -coalesce 200 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING}
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "dm_install_list_v1" "year='$year', month='$month', day='$day', business='${BUSINESS}'" "$OUTPUT_PATH" || exit 1
echo "[Adn Install Pkg Total V1 End!]"
\ No newline at end of file
type=command
retries=3
dependencies=adn_install_total
command=sh -x adn_install_total_v2.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @author :fengliang
# @revision:2017-09-01
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime")
date_path=$(date +%Y/%m/%d -d "-1 day $ScheduleTime")
INPUT_PATH="${DM_INSTALL_LIST}/$date_path/adn_install"
OUTPUT_PATH="${DM_INSTALL_LIST}_v2/$date_path/adn_install"
check_await ${INPUT_PATH}/_SUCCESS
hadoop fs -rm -r ${OUTPUT_PATH}
REDUCE_NUM=$(calculate_reduce_num ${INPUT_PATH})
hadoop jar ../../${JAR} mobvista.dmp.main.ParseInstallRCFile \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
$INPUT_PATH $OUTPUT_PATH ${REDUCE_NUM}
: '
business="adn_install"
spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \
--name "mobvista.dmp.datasource.dm.DmInstallListOrc.${business}" \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.default.parallelism=20 \
--conf spark.speculation=true \
--conf spark.speculation.quantile=0.8 \
--conf spark.speculation.multiplier=1.2 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 10 \
../../${JAR} \
-output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business}
'
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_install_list_v2" "dt='$LOG_TIME', business='adn_install'" "$OUTPUT_PATH"
if [ $? -ne 0 ];then
exit 255
fi
\ No newline at end of file
type=command
retries=3
dependencies=adn_install_total_v1
command=sh -x adn_interest_install.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adn_interest_install.sh
# @author: houying
# @date : 16-11-1
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
INPUT_INSTALL_PATH="${DM_INSTALL_LIST}_v1/$year/$month/$day/adn_install"
INPUT_APP_TAG_PATH="$APP_TAG_PATH/$year/$month/$day"
OUTPUT_INTEREST_TAG="$DM_INTEREST_PATH/$year/$month/$day/adn_install"
check_await "$INPUT_APP_TAG_PATH/_SUCCESS"
REDUCE_NUM=$(calculate_reduce_num ${INPUT_INSTALL_PATH})
hadoop fs -rm -r ${OUTPUT_INTEREST_TAG}
hadoop jar ../../${JAR} mobvista.dmp.common.InterestDeviceDistinctMR \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
-Dmapreduce.map.memory.mb=4096 \
-Dmapreduce.map.java.opts=-Xmx2458m \
-Dmapreduce.reduce.memory.mb=4096 \
-Dmapreduce.reduce.java.opts=-Xmx2458m \
-Dmapreduce.job.reduces=${REDUCE_NUM} \
"$INPUT_APP_TAG_PATH/part-r-00000" "$INPUT_INSTALL_PATH" "$OUTPUT_INTEREST_TAG" "adn install interest job"
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_interest_tag" "year='$year', month='$month', day='$day', business='adn_install'" "$OUTPUT_INTEREST_TAG" || exit 1
echo "[Adn Install + Interest Tag Total End!]"
type=command
retries=3
dependencies=adn_request_pkg_total_v1
command=sh -x adn_interest_request.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adn_interest_click.sh
# @author: houying
# @date : 16-10-17
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
check_await "$APP_TAG_PATH/$year/$month/$day/_SUCCESS"
INPUT_INSTALL_PATH="${DM_INSTALL_LIST}_v1/$year/$month/$day/adn_request_sdk"
INPUT_APP_TAG_PATH="$APP_TAG_PATH/${year}/${month}/${day}/"
OUTPUT="$DM_INTEREST_PATH/$year/$month/$day/adn_request_sdk"
hadoop fs -rm -r "$OUTPUT"
REDUCE_NUM=$(calculate_reduce_num "$INPUT_INSTALL_PATH")
hadoop jar ../../${JAR} mobvista.dmp.common.InterestDeviceDistinctMR \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
-Dmapreduce.map.memory.mb=4096 \
-Dmapreduce.map.java.opts=-Xmx2458m \
-Dmapreduce.reduce.memory.mb=4096 \
-Dmapreduce.reduce.java.opts=-Xmx2458m \
-Dmapreduce.job.reduces=${REDUCE_NUM} \
"$INPUT_APP_TAG_PATH/part-r-00000" "$INPUT_INSTALL_PATH" "$OUTPUT" "adn request interest job"
if [ $? -ne 0 ];then
exit 255
fi
mount_partition "dm_interest_tag" "year='$year', month='$month', day='$day', business='adn_request_sdk'" "$OUTPUT" || exit 1
echo "[Adn Tag Total End!]"
type=command
retries=3
dependencies=adn_interest_install,adn_interest_request,mds_adn_request_daily,adn_request_user_info,adn_install_device_tag,adn_request_device_tag,adn_request_device_tag_daily,adn_install_device_tag_daily,adn_install_total_orc
command=echo "Interest Tag Job End!"
\ No newline at end of file
type=command
retries=3
command=sh -x adn_pre_click_daily.sh
\ No newline at end of file
#!/usr/bin/env bash
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}
INPUT_ADN_INSTALL_PATH="${ADN_PRE_CLICK_PATH}/$year/$month/$day"
OUTPUT_ADN_INSTALL_DAILY="${ETL_ADN_PRE_CLICK_DAILY}/$year/$month/$day"
check_await "$INPUT_ADN_INSTALL_PATH/virginia/23/_SUCCESS"
hadoop fs -rm -r "$OUTPUT_ADN_INSTALL_DAILY"
spark-submit --class mobvista.dmp.datasource.adn.AdnPreClickDaily \
--name "AdnPreClickDaily.${LOG_TIME}" \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--master yarn --deploy-mode cluster \
--executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \
../.././DMP.jar \
-datetime ${LOG_TIME} -output ${OUTPUT_ADN_INSTALL_DAILY} -coalesce 200
if [[ $? -ne 0 ]]; then
exit 255
fi
: '
hadoop jar ../../${JAR} mobvista.dmp.datasource.adn.mapreduce.AdnClickDailyMR \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
"$INPUT_ADN_INSTALL_PATH" "$OUTPUT_ADN_INSTALL_DAILY" || exit 1
'
\ No newline at end of file
type=command
dependencies=adn_request_daily_v2
command=sh -x adn_request_bundle_match.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adn_request_daily_v2.sh
# @author: wangjf
# @date : 2020-04-24 19:58:11
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
old_path=$(date -d "$ScheduleTime 2 days ago" "+%Y/%m/%d")
date_path=$(date -d "$ScheduleTime 1 days ago" "+%Y/%m/%d")
TMP_OUTPUT_PATH="${TMP_AND_REQUEST_SDK_DAILY_PATH}/$date_path"
OUTPUT="$ETL_ADN_REQUEST_SDK_DAILY/$date_path"
# 为bundleId 匹配 packageName
matchBundlePackageV2 "$date_path" "$old_path" "adn_request_sdk" "$TMP_OUTPUT_PATH" "$OUTPUT" "../../${JAR}" "$LOG_TIME"
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "etl_adn_request_sdk_daily" "dt='$LOG_TIME'" "$OUTPUT" || exit 1
UNMATCH_DATA_PATH="${DEV_UNMATCH_DATA_PATH}/${date_path}/adn_request_sdk"
mount_partition "etl_adn_request_sdk_unmatch" "dt='$LOG_TIME'" "$UNMATCH_DATA_PATH" || exit 1
expire_date=$(date -d "$ScheduleTime 4 days ago" "+%Y%m%d")
expire_date_path=$(date -d "$ScheduleTime 4 days ago" "+%Y/%m/%d")
EXPIRE_OUTPUT_PATH="${DEV_UNMATCH_DATA_PATH}/${expire_date_path}/adn_request_sdk"
# 删除过期的分区及删除对应路径
unmount_partition "etl_adn_request_sdk_unmatch" "dt='${expire_date}'" "${EXPIRE_OUTPUT_PATH}"
echo "[EtlAdnRequestSdkDaily End!]"
\ No newline at end of file
type=command
command=sh -x adn_request_daily.sh
\ No newline at end of file
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
# @file : adn_request_daily.sh
# @author: walt
# @date : 16-12-01
# @desc : 从ods_adn_trackingnew_request中抽取appid并匹配出package_name,保存到etl_adn_sdk_request_daily
# # # # # # # # # # # # # # # # # # # # # #
source ../../dmp_env.sh
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
old_path=$(date -d "$ScheduleTime 2 days ago" "+%Y/%m/%d")
date_path=$(date -d "$ScheduleTime 1 days ago" "+%Y/%m/%d")
task_date=$(date -d "$ScheduleTime 1 days ago" +"%Y-%m-%d")
INPUT_ADN_SDK_PKG_DAILY="$ETL_ADN_ORG_REQ_HOURS/${date_path}"
INPUT_MAPPING_PATH="$APP_ID_MAPPING/$date_path"
TMP_OUTPUT_PATH="${TMP_AND_REQUEST_DAILY_PATH}/$date_path"
OUTPUT="$ETL_ADN_SDK_REQUEST_DAILY/$date_path"
check_await "$INPUT_ADN_SDK_PKG_DAILY/virginia/23/_SUCCESS"
check_await "$INPUT_MAPPING_PATH/_SUCCESS"
hadoop fs -rm -r "$TMP_OUTPUT_PATH"
## REDUCE_NUM=$(calculate_reduce_num "${INPUT_ADN_SDK_PKG_DAILY};${INPUT_MAPPING_PATH}")
export HADOOP_CLIENT_OPTS="-Xmx2496m $HADOOP_CLIENT_OPTS"
hadoop jar ../../${JAR} mobvista.dmp.datasource.adn.mapreduce.AdnSdkRequestPkgDailyMR \
-Dtask.date="${task_date}" \
-Dmapreduce.map.memory.mb=4072 \
-Dmapreduce.map.java.opts=-Xmx3458m \
-Dmapreduce.reduce.memory.mb=4072 \
-Dmapreduce.reduce.java.opts=-Xmx3458m \
-Dmapreduce.job.reduces=300 \
-Dmapreduce.fileoutputcommitter.algorithm.version=2 \
"$INPUT_ADN_SDK_PKG_DAILY" "$INPUT_MAPPING_PATH" "$TMP_OUTPUT_PATH" "${DIM_MANUAL_MAPPING}"
if [[ $? -ne 0 ]]; then
exit 255
fi
# 为bundleId 匹配 packageName
matchBundlePackage "$date_path" "$old_path" "2" "4" "$TMP_OUTPUT_PATH" "$OUTPUT" "adn_request" "../../${JAR}"
mount_partition "etl_adn_sdk_request_daily" "\`date\`='$LOG_TIME'" "$OUTPUT" || exit 1
echo "[Etl Adn Sdk request Daily End!]"
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment