Commit e9ba0fb2 by fan.jiang

cn_good_channel

parent cdccac2d
type=command
command=sh -x cn_good_channel.sh
#!/usr/bin/env bash
source ../../dmp_env.sh
dt_today=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d")
dt_slash_today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
dt_slash_yesterday=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d")
check_await "${TMP_EGGPLANTS_OUTPUT_PATH}/${dt_slash_today}"
hadoop fs -test -e "${ODS_OTHER_DEVICE_DAILY}/${dt_slash_today}"
if [ $? -ne 0 ];then
hadoop fs -mkdir -p "${ODS_OTHER_DEVICE_DAILY}/${dt_slash_today}"
fi
PACKAGE_NAME01="sdk_goodreq7"
PACKAGE_NAME02="sdk_wangzhuan_goodreq7"
PACKAGE_NAME03="adx_goodreq7"
OLD_DATA_PATH="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel_to_other/${dt_slash_yesterday}/01"
OUTPUT_PATH1="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel/${dt_slash_today}/${PACKAGE_NAME01}"
OUTPUT_PATH2="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel/${dt_slash_today}/${PACKAGE_NAME02}"
OUTPUT_PATH3="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel/${dt_slash_today}/${PACKAGE_NAME03}"
OUTPUT_PATH4="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel_to_other/${dt_slash_today}/01"
OUTPUT_PATH5="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel_to_other/${dt_slash_today}/02"
hadoop fs -rm -r "${OUTPUT_PATH1}"
hadoop fs -rm -r "${OUTPUT_PATH2}"
hadoop fs -rm -r "${OUTPUT_PATH3}"
hadoop fs -rm -r "${OUTPUT_PATH4}"
hadoop fs -rm -r "${OUTPUT_PATH5}"
spark-submit --class mobvista.dmp.datasource.dm.CnGoodChannel \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.default.parallelism=3000 \
--conf spark.sql.shuffle.partitions=3000 \
--conf spark.driver.maxResultSize=4g \
--conf spark.network.timeout=720s \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 100 \
../../${JAR} -output1 ${OUTPUT_PATH1} -output2 ${OUTPUT_PATH2} -output3 ${OUTPUT_PATH3} \
-output4 ${OUTPUT_PATH4} -output5 ${OUTPUT_PATH5} -old_data_path ${OLD_DATA_PATH} \
-package_name1 ${PACKAGE_NAME01} -package_name2 ${PACKAGE_NAME02} -package_name3 ${PACKAGE_NAME03} \
-coalesce 600
if [[ $? -ne 0 ]]; then
exit 255
fi
hadoop distcp -m20 "${OUTPUT_PATH4}/*" "${TMP_EGGPLANTS_OUTPUT_PATH}/${dt_slash_today}/"
hadoop distcp -m20 "${OUTPUT_PATH5}/*" "${ODS_OTHER_DEVICE_DAILY}/${dt_slash_today}/"
: '
按照需求,从CnGoodChannel.scala文件里的三个sql语句分别抽取不同的数据,放到三个伪包名对应的s3路径中,由于计算量较大,sql里面取得都是一天的数据
然后和前一天的结果数据去重后,把今天的结果存储在output1 output2 output3 三个路径,同步大媒体使用
并且把数据写到output4 output5 路径,然后入安装列表business="other"分区
'
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment