#!/usr/bin/env bash source ../../dmp_env.sh dt_slash_today=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d") check_await "${TMP_EGGPLANTS_OUTPUT_PATH}/${dt_slash_today}" hadoop fs -test -e "${ODS_OTHER_DEVICE_DAILY}/${dt_slash_today}" if [ $? -ne 0 ];then hadoop fs -mkdir -p "${ODS_OTHER_DEVICE_DAILY}/${dt_slash_today}" fi OUTPUT_PATH01="${TMP_COM_REYUN_PRACTICALTOOL_PATH}/${dt_slash_today}/01" OUTPUT_PATH02="${TMP_COM_REYUN_PRACTICALTOOL_PATH}/${dt_slash_today}/02" hadoop fs -rm -r "${OUTPUT_PATH01}" hadoop fs -rm -r "${OUTPUT_PATH02}" spark-submit --class mobvista.dmp.datasource.dm.ComReyunPracticaltool \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.default.parallelism=2000 \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.driver.maxResultSize=4g \ --conf spark.network.timeout=720s \ --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 5 --num-executors 10 \ ../../${JAR} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -coalesce 400 if [[ $? -ne 0 ]]; then exit 255 fi hadoop distcp -m20 "${OUTPUT_PATH01}/*" "${TMP_EGGPLANTS_OUTPUT_PATH}/${dt_slash_today}/" hadoop distcp -m20 "${OUTPUT_PATH02}/*" "${ODS_OTHER_DEVICE_DAILY}/${dt_slash_today}/" : ' 任务说明 运营侧有需要针对热云提供给我们的标签,拉取对应标签的设备进行伪包名入库的需求 运行以下sql select device_id from dwh.device_tag_weight_event_all_weekly where dt='20210922' and id_type in('oaid','imei') and tag_code = '0414' ---需要更改tag_code group by device_id tag_code和人群包的命名关系如下 一共有5个,需要限制不同的tag_code 0414-com.reyun_practicaltool 041404-com.reyun_clean 040507-com.reyun_smalltool 041403-com.reyun_wifi 041406-com.reyun_security '