Commit 7890533e by WangJinfeng

fix rtdmp

parent 986ad516
...@@ -4,48 +4,35 @@ source ../dmp_env.sh ...@@ -4,48 +4,35 @@ source ../dmp_env.sh
today=${ScheduleTime} today=${ScheduleTime}
date_time=$(date +"%Y-%m-%d.%H" -d "-1 hour $today") date_time=$(date +"%Y%m%d%H" -d "-2 hour $today")
date_path=$(date +%Y/%m/%d/%H -d "-1 hour $today") date_path=$(date +%Y/%m/%d/%H -d "-1 hour $today")
part_num=$(hadoop fs -ls s3://mob-emr-test/dataplatform/rtdmp_pre/${date_path}/ | wc -l)
if [[ ${part_num} -le 50 ]]; then
echo "This Dir No Data !!!"
partition=10
coalesce=10
executor=2
memory=4
core=2
flag=0
else
partition=2000
coalesce=200
executor=8
memory=10
core=4
flag=1
fi
INPUT="s3://mob-emr-test/dataplatform/rtdmp_pre/${date_path}" INPUT="s3://mob-emr-test/dataplatform/rtdmp_pre/${date_path}"
OUTPUT="s3://mob-emr-test/dataplatform/rtdmp_deal/${date_path}" OUTPUT="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/audience_merge/${date_path}"
before_date_path=$(date +%Y/%m/%d/%H -d "-2 hour $today")
BEFORE_OUTPUT="s3://mob-emr-test/dataplatform/rtdmp/${before_date_path}"
check_await "${BEFORE_OUTPUT}/_SUCCESS"
spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMain \ spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMain \
--name "RTDmpMain.${date_time}" \ --name "RTDmpMain.${date_time}" \
--conf spark.sql.shuffle.partitions=${partition} \ --conf spark.sql.shuffle.partitions=1000 \
--conf spark.default.parallelism=${partition} \ --conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \ --conf spark.kryoserializer.buffer=64m \
--master yarn --deploy-mode cluster \ --master yarn --deploy-mode cluster \
--executor-memory ${memory}g --driver-memory 6g --executor-cores ${core} --num-executors ${executor} \ --executor-memory 18g --driver-memory 4g --executor-cores 5 --num-executors 40 \
.././DMP.jar \ .././DMP.jar \
-flag ${flag} -time ${date_time} -input ${INPUT} -output ${OUTPUT} -coalesce ${coalesce} -datetime ${date_time} -input ${INPUT} -output ${OUTPUT} -coalesce 200
if [[ $? -ne 0 ]]; then if [[ $? -ne 0 ]]; then
exit 255 exit 255
fi fi
mount_partition "audience_merge" "dt='${curr_time}'" "$OUTPUT"
expire_time=$(date +"%Y%m%d%H" -d "-24 hour $today")
expire_date_path=$(date +%Y/%m/%d/%H -d "-24 hour $today")
EXPIRE_OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/audience_merge/${expire_date_path}"
unmount_partition "audience_merge" "dt='${expire_time}'" "${EXPIRE_OUTPUT_PATH}"
\ No newline at end of file
...@@ -4,7 +4,7 @@ source ../dmp_env.sh ...@@ -4,7 +4,7 @@ source ../dmp_env.sh
today=${ScheduleTime:-$1} today=${ScheduleTime:-$1}
start_time=$(date +"%Y-%m-%d %H:00:00" -d "-24 hours $today") start_time=$(date +"%Y-%m-%d %H:00:00" -d "-168 hours $today")
end_time=$(date +"%Y-%m-%d %H:59:59" -d "-1 hours $today") end_time=$(date +"%Y-%m-%d %H:59:59" -d "-1 hours $today")
java -cp ../${JAR} mobvista.dmp.datasource.rtdmp.ServerMain "${start_time}" "${end_time}" java -cp ../${JAR} mobvista.dmp.datasource.rtdmp.ServerMain "${start_time}" "${end_time}"
......
type=command type=command
command=bash -x rtdmp_merge.sh dependencies=rtdmp_pre
\ No newline at end of file command=bash -x rtdmp.sh
\ No newline at end of file
...@@ -12,19 +12,17 @@ database="dwh" ...@@ -12,19 +12,17 @@ database="dwh"
table="audience_merge" table="audience_merge"
spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMergeCK \ spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMergeCK \
--name "RTDmpMergeCK.wangjf.${date_time}" \ --name "RTDmpMergeCK.wangjf.${date_time}" \
--conf spark.sql.shuffle.partitions=1000 \ --conf spark.sql.shuffle.partitions=1000 \
--conf spark.default.parallelism=1000 \ --conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=256m \ --conf spark.kryoserializer.buffer.max=256m \
--conf spark.speculation=false \ --conf spark.speculation=false \
--conf spark.speculation.quantile=0.9 \ --conf spark.speculation.quantile=0.9 \
--conf spark.speculation.multiplier=1.3 \ --conf spark.speculation.multiplier=1.3 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 6 --num-executors 10 \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ ../${JAR} -date_time "${date_time}" -host ${host} -cluster ${cluster} -database ${database} -table ${table}
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 6 \
../${JAR} -date_time "${date_time}" -host ${host} -cluster ${cluster} -database ${database} -table ${table}
if [[ $? -ne 0 ]]; then if [[ $? -ne 0 ]]; then
exit 255 exit 255
fi fi
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment