Commit 7890533e by WangJinfeng

fix rtdmp

parent 986ad516
......@@ -4,48 +4,35 @@ source ../dmp_env.sh
today=${ScheduleTime}
date_time=$(date +"%Y-%m-%d.%H" -d "-1 hour $today")
date_time=$(date +"%Y%m%d%H" -d "-2 hour $today")
date_path=$(date +%Y/%m/%d/%H -d "-1 hour $today")
part_num=$(hadoop fs -ls s3://mob-emr-test/dataplatform/rtdmp_pre/${date_path}/ | wc -l)
if [[ ${part_num} -le 50 ]]; then
echo "This Dir No Data !!!"
partition=10
coalesce=10
executor=2
memory=4
core=2
flag=0
else
partition=2000
coalesce=200
executor=8
memory=10
core=4
flag=1
fi
INPUT="s3://mob-emr-test/dataplatform/rtdmp_pre/${date_path}"
OUTPUT="s3://mob-emr-test/dataplatform/rtdmp_deal/${date_path}"
before_date_path=$(date +%Y/%m/%d/%H -d "-2 hour $today")
BEFORE_OUTPUT="s3://mob-emr-test/dataplatform/rtdmp/${before_date_path}"
check_await "${BEFORE_OUTPUT}/_SUCCESS"
OUTPUT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/audience_merge/${date_path}"
spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMain \
--name "RTDmpMain.${date_time}" \
--conf spark.sql.shuffle.partitions=${partition} \
--conf spark.default.parallelism=${partition} \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--master yarn --deploy-mode cluster \
--executor-memory ${memory}g --driver-memory 6g --executor-cores ${core} --num-executors ${executor} \
--executor-memory 18g --driver-memory 4g --executor-cores 5 --num-executors 40 \
.././DMP.jar \
-flag ${flag} -time ${date_time} -input ${INPUT} -output ${OUTPUT} -coalesce ${coalesce}
-datetime ${date_time} -input ${INPUT} -output ${OUTPUT} -coalesce 200
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "audience_merge" "dt='${curr_time}'" "$OUTPUT"
expire_time=$(date +"%Y%m%d%H" -d "-24 hour $today")
expire_date_path=$(date +%Y/%m/%d/%H -d "-24 hour $today")
EXPIRE_OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/audience_merge/${expire_date_path}"
unmount_partition "audience_merge" "dt='${expire_time}'" "${EXPIRE_OUTPUT_PATH}"
\ No newline at end of file
......@@ -4,7 +4,7 @@ source ../dmp_env.sh
today=${ScheduleTime:-$1}
start_time=$(date +"%Y-%m-%d %H:00:00" -d "-24 hours $today")
start_time=$(date +"%Y-%m-%d %H:00:00" -d "-168 hours $today")
end_time=$(date +"%Y-%m-%d %H:59:59" -d "-1 hours $today")
java -cp ../${JAR} mobvista.dmp.datasource.rtdmp.ServerMain "${start_time}" "${end_time}"
......
type=command
command=bash -x rtdmp_merge.sh
\ No newline at end of file
dependencies=rtdmp_pre
command=bash -x rtdmp.sh
\ No newline at end of file
......@@ -20,9 +20,7 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMergeCK \
--conf spark.speculation.quantile=0.9 \
--conf spark.speculation.multiplier=1.3 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 6 \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 6 --num-executors 10 \
../${JAR} -date_time "${date_time}" -host ${host} -cluster ${cluster} -database ${database} -table ${table}
if [[ $? -ne 0 ]]; then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment