1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/bash
source ../../dmp_env.sh
# region name
region=$1
# 等待时间
hour=$2
schedule_time=$(date +"%Y-%m-%d %H:%M:%S" -d "${hour} hour $ScheduleTime")
final_schedule_time=$(date +%s -d "$schedule_time")
currtime=$(date '+%s')
while [[ true ]]; do
if [ ${currtime} -gt ${final_schedule_time} ]; then
break
fi
sleep 300
currtime=$(date '+%s')
done
# 回流备份开始时间
backflow_start_time=$(date +%Y%m%d -d "-1 day $ScheduleTime")
# 回流备份开始时间戳 * 1000000,Cassandra 中 writeTime 为16位
writetime_start=$(expr $(date +%s -d "$backflow_start_time") \* 1000000)
# 回流备份结束时间
backflow_end_time=$(date +%Y%m%d -d "$ScheduleTime")
# 回流备份结束时间戳 * 1000000
writetime_end=$(expr $(date +%s -d "$backflow_end_time") \* 1000000)
# package name
system="mapping"
# keyspace name
keyspace="mapping"
# table name
table="mapping_server"
# 需指定一个 value column,用于计算 writeTime
value_column="devid_value"
date_path=$(date +"%Y/%m/%d" -d "-1 day ${ScheduleTime}")
date=$(date +%Y%m%d -d "-1 day ${ScheduleTime}")
year=${date:0:4}
month=${date:4:2}
day=${date:6:2}
# 备份 s3 地址
output="${BACKFLOW_OUTPUT}/${keyspace}/${table}/${date_path}/${region}/"
spark-submit --class mobvista.dmp.datasource.backflow.BackFlow \
--name "BackFlow.${keyspace}.${table}.${region}" \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.default.parallelism=2000 \
--conf spark.kryoserializer.buffer.max=512m \
--conf spark.kryoserializer.buffer=64m \
--master yarn --deploy-mode cluster \
--executor-memory 4g --driver-memory 4g --executor-cores 4 --num-executors 6 \
../.././DMP.jar \
-keyspace ${keyspace} -table ${table} -region ${region} -output ${output} -system ${system} \
-writetime_start ${writetime_start} -writetime_end ${writetime_end} -value_column ${value_column}
if [[ $? -ne 0 ]]; then
exit 255
fi
common_mount_partition "default" "mapping_dump_table_shulun" "year='${year}', month='${month}', day='${day}', region='${region}'" "${output}"
if [[ $? -ne 0 ]]; then
exit 255
fi