#!/bin/bash source ../../dmp_env.sh # region name region=$1 # 等待时间 hour=$2 schedule_time=$(date +"%Y-%m-%d %H:%M:%S" -d "${hour} hour $ScheduleTime") final_schedule_time=$(date +%s -d "$schedule_time") currtime=`date '+%s'` while [[ true ]]; do if [ ${currtime} -gt ${final_schedule_time} ];then break fi sleep 300 currtime=`date '+%s'` done # 回流备份开始时间 backflow_start_time=$(date +%Y%m%d -d "-1 day $ScheduleTime") # 回流备份开始时间戳 * 1000000,Cassandra 中 writeTime 为16位 writetime_start=`expr $(date +%s -d "$backflow_start_time") \* 1000000` # 回流备份结束时间 backflow_end_time=$(date +%Y%m%d -d "$ScheduleTime") # 回流备份结束时间戳 * 1000000 writetime_end=`expr $(date +%s -d "$backflow_end_time") \* 1000000` # package name system="mapping" # keyspace name keyspace="mapping" # table name table="mapping_server" # 需指定一个 value column,用于计算 writeTime value_column="devid_value" date_path=$(date +"%Y/%m/%d" -d "-1 day ${ScheduleTime}") # 备份 s3 地址 output="${BACKFLOW_OUTPUT}/${keyspace}/${table}/${date_path}/${region}/" spark-submit --class mobvista.dmp.datasource.backflow.BackFlow \ --name "BackFlow.${keyspace}.${table}.${region}" \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.default.parallelism=2000 \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --master yarn --deploy-mode cluster \ --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 4 \ ../.././DMP.jar \ -keyspace ${keyspace} -table ${table} -region ${region} -output ${output} -system ${system} \ -writetime_start ${writetime_start} -writetime_end ${writetime_end} -value_column ${value_column} if [[ $? -ne 0 ]]; then exit 255 fi