joypc_sdk_daily.sh 1.52 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
#!/bin/sh

# # # # # # # # # # # # # # # # # # # # # #
# @author : 刘凯
# @date : 2018-02-18
# @desc : 解析fluentd joypc_sdk daily数据 至 etl daily 表
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

LOG_TIME=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d")
date_path=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")

year=$(date -d "$ScheduleTime 1 days ago" +"%Y")
month=$(date -d "$ScheduleTime 1 days ago" +"%m")
day=$(date -d "$ScheduleTime 1 days ago" +"%d")


WangJinfeng committed
19
INPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/stage_joypac_upload_daily/$date_path/*/*"
wang-jinfeng committed
20 21 22 23 24 25 26 27
## OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_joypc_sdk_daily/$date_path"
OUTPUT_PATH="${JOYPAC_DAILY_LOG_PATH}/$date_path"


hadoop fs -rm -r "$OUTPUT_PATH/"

spark-submit --class mobvista.dmp.datasource.joypac.JoypcSdkDaily \
 --conf spark.yarn.executor.memoryOverhead=2048  --conf spark.network.timeout=720s \
WangJinfeng committed
28 29 30
 --conf spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive=true \
 --conf spark.hadoop.fs.s3.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
 --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
WangJinfeng committed
31 32
 --master yarn --deploy-mode cluster --name joypc_sdk_daily \
 --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.input_path=${INPUT_PATH} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \
wang-jinfeng committed
33 34 35 36 37 38
 ../${JAR}
if [ $? -ne 0 ];then
  exit 255
fi

mount_partition "etl_joypc_sdk_daily" "day='${LOG_TIME}'" "$OUTPUT_PATH"