1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # #
## 为保证作业失败重试导致dsp原始日志重新跑数据
# # # # # # # # # # # # # # # # # # # # # #
source ../dmp_env.sh
ScheduleTime=${ScheduleTime:-$1}
LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y-%m-%d")
dt=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
date_path=$(date -d "$ScheduleTime 1 days ago" "+%Y/%m/%d")
old_path=$(date -d "$ScheduleTime 2 days ago" "+%Y/%m/%d")
dt_yesterday=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
rm_dt=$(date -d "$ScheduleTime 180 days ago" "+%Y%m%d")
rm_dt_path=$(date -d "$ScheduleTime 180 days ago" "+%Y/%m/%d")
: '
#由日期获得上周日日期
week=$(date -d "$date_path" +%w)
echo "week=$week"
if [ "$week" -eq "0" ]; then
week=7 #若为周日,则表示为7
fi
if [ "$week" -eq "1" ]; then
week=8 #若为周一,则表示为8,取上上周日数据
fi
last_sunday=$(date +%Y%m%d -d "-$week day $date_path")
'
INPUT_DSP_PATH="$ADN_DSP_PATH/$date_path"
TMP_OUTPUT_PATH="${TMP_DSP_REQUEST_DAILY_PATH}/$date_path"
OUTPUT_PATH="$ETL_DSP_REQ_DAILY/$date_path"
MDS_REQUEST_OUTPUT_PATH="${MDS_DSP_REQ_DAILY}/$date_path"
RM_MDS_REQUEST_OUTPUT_PATH="${MDS_DSP_REQ_DAILY}/$rm_dt_path"
## 为bundleId 匹配 packageName
## matchBundlePackage "$date_path" "$old_path" "2" "10" "$TMP_OUTPUT_PATH" "$OUTPUT_PATH" "dsp" "../${JAR}"
matchBundlePackage "$date_path" "$old_path" "2" "10" "$TMP_OUTPUT_PATH" "${DSP_BUNDLE_OUTPUT_PATH}" "dsp" "../${JAR}"
### 一个spark 作业处理imei设备类型:
check_await "${DEVICE_ID_MD5_MATCH_PATH}/${old_path}/_SUCCESS"
hadoop fs -rm -r ${OUTPUT_PATH}
### 一个spark 作业处理imei设备类型:
spark-submit --class mobvista.dmp.datasource.dsp.DspReqImeiDealDaily \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.network.timeout=720s \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.default.parallelism=200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \
--master yarn --deploy-mode cluster --name DspReqImeiDealDaily --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 20 \
../${JAR} -output "$OUTPUT_PATH" -coalesce 200 \
-devmd5day ${dt_yesterday}
if [[ $? -ne 0 ]]; then
exit 255
fi
mount_partition "etl_dsp_request_daily" "\`date\`='$LOG_TIME'" "$OUTPUT_PATH"