#!/bin/bash

######################################################################
# autor: fengliang
# date:  20170518
# desc:  计算年兴趣标签数据各个年龄段儿的样本量和算法推荐量
######################################################################

source ../prd_env.sh

echo "ScheduleTime=$ScheduleTime"
date=$(date +%Y%m%d -d "-1 days $ScheduleTime")

year=${date:0:4}
month=${date:4:2}
day=${date:6:2}
date_path=${year}/${month}/${day}


INPUT_PATH="$DM_INTEREST_PATH/$date_path/all/"
OUTPUT_PATH="$REPORT_INTEREST_TAG_RESULT_PATH/$date_path/"
COUNTRY_OUTPUT_PATH="${REPORT_INTEREST_TAG_CNT_RESULT_PATH}/$date_path"

echo "INPUT_PATH = $INPUT_PATH"
echo "OUTPUT_PATH = $OUTPUT_PATH"

#检查文件是否存在
count=0
while [[ true ]];
do
    hadoop fs -test -e $INPUT_PATH/_SUCCESS
    if [ $? -eq 0 ];then
      break
    else
      echo "data delay, will retry after 60s"
    fi

    if [ $count -gt 10 ];then
      echo "Data delay..."
      exit 255
    fi

    count=$(( count + 1 ))
    sleep 60
done


echo "Will rm hdfs dir : $OUTPUT_PATH"
hadoop fs -rm -r $OUTPUT_PATH


hadoop jar ../${JAR} mobvista.prd.datasource.tag.mapreduce.InterestTagJob -input $INPUT_PATH \
    -Dmapreduce.fileoutputcommitter.algorithm.version=2 \
    -output $OUTPUT_PATH -reduceNum 2
if [ $? -ne 0 ]; then
    exit 255
fi


hql="
use dwh;
add jar ../${JAR};
create temporary function getInterestInfo as 'mobvista.prd.datasource.udf.GetInterestInfo';

insert overwrite directory '$COUNTRY_OUTPUT_PATH'
select t.country, t.interest, count(1) as cnt
from (
  select a.device_id, a.device_type, a.interest,
   case when b.country is null or b.country ='' then 'OTHER'
   else b.country end as country
  from
  (
    select t.device_id, t.device_type, getInterestInfo(t.tags) as interest
    from dm_interest_tag t
    where t.year='$year' and t.month='$month' and t.day='$day'
    and t.business='all'
  ) a
  left outer join
  (
    select *
    from ods_dmp_device_total t
    where t.dt='${date}'
    and t.country in (${REPORT_COUNTRIES})
  ) b on a.device_id=b.device_id and a.device_type=b.device_type
) t
group by t.country, t.interest;
"

echo -e "hql : \n $hql"
$offline_hive -e "$hql"
if [ $? -ne 0 ];then
  exit 255
fi

data_dir="../tmp"
data_file="$data_dir/interest_tag_${date}.data"
data_cnt_file="$data_dir/interest_tag_cnt_${date}.data"
load_file="$data_dir/interest_tag_${date}.load"
if [ ! -d $data_dir ];then
  mkdir -p $data_dir
fi

hadoop fs -getmerge $OUTPUT_PATH $data_file
if [ $? -ne 0 ];then
  exit 255
fi

hadoop fs -getmerge $COUNTRY_OUTPUT_PATH  $data_cnt_file
if [ $? -ne 0 ];then
  exit 255
fi

perl -pi -e 's|\001|\t|g' $data_cnt_file

cat $data_file | awk -F '\t' -v date=${date} '{print date"\tDMP标签数据\t兴趣\t整体\t"$1"\t"$2"\t-\t样本量\t"$3}' >> $load_file
cat $data_cnt_file | awk -F '\t' -v date=${date} '{print date"\tDMP标签数据\t兴趣\t分国家\t"$1"\t"$2"\t"$3"\t样本量\t"$4}' >> $load_file

del_sql="
  DELETE FROM dmp_report_reuslt
  where day_key='${date}' and dimension_type1='DMP标签数据'
  and dimension_type2='兴趣' AND video_desc in ('样本量');"

load_sql="
  $del_sql;load data local infile '${load_file}' into table dmp_report_reuslt;
"
$MYSQL_ETL "$load_sql"
if [ $? -ne 0 ];then
  exit 255
fi


rm $data_file $load_file