#!/usr/bin/env bash
source ../../dmp_env.sh


dt_today=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d")
dt_slash_today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
dt_slash_yesterday=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d")

check_await "${TMP_EGGPLANTS_OUTPUT_PATH}/${dt_slash_today}"

hadoop fs -test -e "${ODS_OTHER_DEVICE_DAILY}/${dt_slash_today}"
if [ $? -ne 0 ];then
  hadoop fs -mkdir -p "${ODS_OTHER_DEVICE_DAILY}/${dt_slash_today}"
fi

PACKAGE_NAME01="sdk_goodreq7"
PACKAGE_NAME02="sdk_wangzhuan_goodreq7"
PACKAGE_NAME03="adx_goodreq7"
OLD_DATA_PATH="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel_to_other/${dt_slash_yesterday}/01"
OUTPUT_PATH1="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel/${dt_slash_today}/${PACKAGE_NAME01}"
OUTPUT_PATH2="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel/${dt_slash_today}/${PACKAGE_NAME02}"
OUTPUT_PATH3="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel/${dt_slash_today}/${PACKAGE_NAME03}"
OUTPUT_PATH4="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel_to_other/${dt_slash_today}/01"
OUTPUT_PATH5="${RTDMP_TMP_PACKAGE_NAME_PATH}/cn_good_channel_to_other/${dt_slash_today}/02"
hadoop fs -rm -r "${OUTPUT_PATH1}"
hadoop fs -rm -r "${OUTPUT_PATH2}"
hadoop fs -rm -r "${OUTPUT_PATH3}"
hadoop fs -rm -r "${OUTPUT_PATH4}"
hadoop fs -rm -r "${OUTPUT_PATH5}"


spark-submit --class mobvista.dmp.datasource.dm.CnGoodChannel \
 --conf spark.yarn.executor.memoryOverhead=2048  \
 --conf spark.default.parallelism=3000 \
 --conf spark.sql.shuffle.partitions=3000 \
 --conf spark.driver.maxResultSize=4g \
 --conf spark.network.timeout=720s \
 --master yarn --deploy-mode cluster  --executor-memory 8g --driver-memory 6g  --executor-cores 5  --num-executors 100 \
 ../../${JAR}    -output1 ${OUTPUT_PATH1}  -output2 ${OUTPUT_PATH2}  -output3 ${OUTPUT_PATH3}   \
  -output4 ${OUTPUT_PATH4}  -output5 ${OUTPUT_PATH5}  -old_data_path ${OLD_DATA_PATH} \
  -package_name1 ${PACKAGE_NAME01}  -package_name2 ${PACKAGE_NAME02}  -package_name3 ${PACKAGE_NAME03}   \
  -coalesce 600  


if [[ $? -ne 0 ]]; then
    exit 255
fi

hadoop distcp -m20  "${OUTPUT_PATH4}/*" "${TMP_EGGPLANTS_OUTPUT_PATH}/${dt_slash_today}/"
hadoop distcp -m20  "${OUTPUT_PATH5}/*" "${ODS_OTHER_DEVICE_DAILY}/${dt_slash_today}/"


: '
按照需求，从CnGoodChannel.scala文件里的三个sql语句分别抽取不同的数据，放到三个伪包名对应的s3路径中，由于计算量较大，sql里面取得都是一天的数据
然后和前一天的结果数据去重后，把今天的结果存储在output1 output2 output3 三个路径,同步大媒体使用
并且把数据写到output4 output5 路径，然后入安装列表business="other"分区
2021-09-17 开发需求更新
1、output1 output2 output3 三个路径名称中imeimd5要改成imei_md5，oaidmd5要改成oaid_md5
2、imei用md5加密，产出到 imei_md5，然后去重；oaid同理。
'