Commit 48220007 by fan.jiang

dsp_req分区数据人群包临时产出s3,供产品使用

parent 3fe9507c
type=command
command=sh -x ./tmp_extract_data_from_dsp_req.sh
\ No newline at end of file
#!/bin/bash
source ../dmp_env.sh
ScheduleTime=${ScheduleTime:-$1}
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y-%m-%d")
dt=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
date_path=$(date -d "$ScheduleTime 1 days ago" "+%Y/%m/%d")
old_path=$(date -d "$ScheduleTime 2 days ago" "+%Y/%m/%d")
rm_dt=$(date -d "$ScheduleTime 180 days ago" "+%Y%m%d")
rm_dt_path=$(date -d "$ScheduleTime 180 days ago" "+%Y/%m/%d")
Tmp_Extract_Data_From_DspReq_Path="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/rtdmp_tmp_extract_data_from_dspReq_path"
ETL_DSP_REQ_ETL_HOURS_INPUT_PATH="${ETL_DSP_REQ_ETL_HOURS}/$date_path/*/*"
check_await "${ETL_DSP_REQ_ETL_HOURS}/$date_path/23/_SUCCESS"
hadoop fs -rm -r ${Tmp_Extract_Data_From_DspReq_Path}
spark-submit --class mobvista.dmp.datasource.dsp.TmpExtractDataFromDspReq \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=10000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 100 \
../${JAR} -input $ETL_DSP_REQ_ETL_HOURS_INPUT_PATH \
-output ${Tmp_Extract_Data_From_DspReq_Path} \
-coalesce 200 || exit 1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment