#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file : fix_dmp_install_list.sh # @author : jinfeng.wang # @time : 2020-05-22 16:06:07 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=${ScheduleTime:-$1} date=$(date +%Y%m%d -d "-1 day $LOG_TIME") date_path=$(date +"%Y/%m/%d" -d "-1 day ${LOG_TIME}") BUSINESS="adn_request_sdk" INPUT_MAPPING="s3://mob-emr-test/wangjf/data/ruid_result" OUTPUT_PATH="${DM_INSTALL_LIST}_tmp/$date_path/${BUSINESS}" spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \ --name "FixInstallListRuid.${date}.${BUSINESS}" \ --conf spark.sql.shuffle.partitions=4000 \ --conf spark.default.parallelism=4000 \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../${JAR} \ -date ${date} -coalesce 1000 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING} if [[ $? -ne 0 ]]; then exit 255 fi : ' hdfs dfs -rm -r s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/iqiyi_api/* hadoop distcp -m 200 -overwrite s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list_tmp/2021/03/26/iqiyi_api/* s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/iqiyi_api/ hdfs dfs -rm -r s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/joypacios/* hadoop distcp -m 200 -overwrite s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list_tmp/2021/03/26/joypacios/* s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/joypacios/ hdfs dfs -rm -r s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/dsp_req/* hadoop distcp -m 200 -overwrite s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list_tmp/2021/03/26/dsp_req/* s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/dsp_req/ hdfs dfs -rm -r s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/mopub_dealerid/* hadoop distcp -m 200 -overwrite s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list_tmp/2021/03/26/mopub_dealerid/* s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/mopub_dealerid/ hdfs dfs -rm -r s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/mp/* hadoop distcp -m 200 -overwrite s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list_tmp/2021/03/26/mp/* s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2021/03/26/mp/ '