!/usr/bin/env bash

# # # # # # # # # # # # # # # # # # # # # #
# @author  :wangjf
# @revision: 2018-12-12 10:47:34
# @desc :  头条DMP 数据合作
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

today=${ScheduleTime:-$1}
date=$(date +"%Y%m%d" -d "-1 day $today")
date_path=$(date +%Y/%m/%d -d "-1 day $today")
endDate=$(date +"%Y-%m-%d" -d "-1 day $today")
startDate=$(date +"%Y-%m-%d" -d "-31 day $today")

file="s3://mob-emr-test/wangjf/data/mob_tag.txt"
output_path="s3://mob-emr-test/wangjf/toutiao/${date_path}"

spark-submit --class mobvista.dmp.datasource.toutiao.GameDeviceToutiao \
     --conf spark.yarn.executor.memoryOverhead=2048 \
     --conf spark.sql.shuffle.partitions=4000 \
     --conf spark.default.parallelism=300 \
     --files ${HIVE_SITE_PATH} \
     --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
     --master yarn --deploy-mode cluster --executor-memory 12g --driver-memory 6g  --executor-cores 4  --num-executors 120 \
     ${JAR} \
     -date ${date} -output ${output_path} -startDate ${startDate} -endDate ${endDate} -file ${file}

if [ $? -ne 0 ]; then
    exit 255
fi
path="/data2/wangjf/data/toutiao/${date}"
mkdir -p ${path}
cd ${path}
hdfs dfs -get ${output_path}/* .

for file in `ls`
do
    echo ${file}
    zip ${file}".zip" ${file}
done

advertiser_id="109058238138"
data_source_name="mintegral_device_${date}"
app_id="1620985601930253"
secret="5be31e12d0f5c62b62a0e621cd5cf07c8abb0dc7"

python toutiao.py ${advertiser_id} ${data_source_name} ${app_id} ${secret} ${path}

if [ $? -ne 0 ]; then
    exit 255
fi