Commit 15972dc8 by wang-jinfeng

optimize dmp

parent fd0560d0
......@@ -4,26 +4,48 @@ source ../dmp_env.sh
today=${ScheduleTime}
date_time=$(date +"%Y-%m-%d %H" -d "-1 hour $today")
date_time=$(date +"%Y-%m-%d.%H" -d "-1 hour $today")
date_path=$(date +%Y/%m/%d/%H -d "-1 hour $today")
part_num=$(hadoop fs -ls s3://mob-emr-test/dataplatform/rtdmp_pre/${date_path}/ | wc -l)
if [[ ${part_num} -le 50 ]]; then
echo "This Dir No Data !!!"
partition=10
coalesce=10
executor=2
memory=4
core=2
flag=0
else
partition=2000
coalesce=200
executor=8
memory=10
core=4
flag=1
fi
INPUT="s3://mob-emr-test/dataplatform/rtdmp_pre/${date_path}"
OUTPUT="s3://mob-emr-test/dataplatform/rtdmp_deal/${date_path}/0"
OUTPUT="s3://mob-emr-test/dataplatform/rtdmp_deal/${date_path}"
spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMainDeal \
--name "RTDmpMainDeal.${date_time}" \
--conf spark.sql.shuffle.partitions=10000 \
--conf spark.default.parallelism=500 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.speculation=true \
--conf spark.speculation.quantile=0.9 \
--conf spark.speculation.multiplier=1.3 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 4 --num-executors 50 \
../${JAR} -time "${date_time}" -data_utime "${date_time}" -input ${INPUT} -output ${OUTPUT} -coalesce 200 -partition 10000
before_date_path=$(date +%Y/%m/%d/%H -d "-2 hour $today")
BEFORE_OUTPUT="s3://mob-emr-test/dataplatform/rtdmp/${before_date_path}"
check_await "${BEFORE_OUTPUT}/_SUCCESS"
spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMain \
--name "RTDmpMain.${date_time}" \
--conf spark.sql.shuffle.partitions=${partition} \
--conf spark.default.parallelism=${partition} \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--master yarn --deploy-mode cluster \
--executor-memory ${memory}g --driver-memory 6g --executor-cores ${core} --num-executors ${executor} \
.././DMP.jar \
-flag ${flag} -time ${date_time} -input ${INPUT} -output ${OUTPUT} -coalesce ${coalesce}
if [[ $? -ne 0 ]]; then
exit 255
fi
\ No newline at end of file
exit 255
fi
......@@ -31,7 +31,6 @@ import scala.collection.{immutable, mutable}
object Logic {
def getResultFeature(session: CqlSession, iterator: Iterator[Row]): Iterator[AudienceInfo] = {
val sql =
"""
|select audience_data from rtdmp.audience_info where devid = '@devid'
......@@ -39,7 +38,6 @@ object Logic {
val res = new ArrayBuffer[AudienceInfo]()
iterator.foreach(row => {
// val session = connector.openSession()
val devId = row.getAs[String](0)
val audience_data = row.getAs[String](1)
val query_sql = sql.replace("@devid", devId)
......@@ -49,7 +47,6 @@ object Logic {
} else {
new JSONObject().toJSONString
}
// session.close()
res.add(AudienceInfo(devId, audience_data, old_audience_data))
})
res.iterator()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment