#!/bin/bash

######################################################################
# autor: fengliang
# date:  20170518
# desc:  将adn、dsp和ga每天的活跃用户添加到ods_dmp_device_total表中
######################################################################

source ../prd_env.sh

echo "ScheduleTime=$ScheduleTime"
date=$(date +%Y%m%d -d "-1 days $ScheduleTime")
old_date=$(date +%Y%m%d -d "-2 days $ScheduleTime")

# 获取ga最近的日期
ga_date=`get_recently_date $GA_DAILY_PATH/ $date '_SUCCESS'`

year=${date:0:4}
month=${date:4:2}
day=${date:6:2}
date_path=${year}/${month}/${day}
old_path=${old_date:0:4}/${old_date:4:2}/${old_date:6:2}

ADN_DEVICE_DAILY_PATH="${ADN_REQUEST_PATH}/$date_path"
DSP_DEVICE_DAILY_PATH="${ETL_DSP_REQ_DAILY}/$date_path"
GA_DEVICE_DAILY_PATH="${GA_DAILY_PATH}/${ga_date:0:4}/${ga_date:4:2}/${ga_date:6:2}"
OLD_DMP_DEVICE_TOTAL_PATH="${DMP_TOTAL_PATH}/$old_path"
DMP_DEVICE_TOTAL_PATH="${DMP_TOTAL_PATH}/$date_path"

# 检测adn每日数据是否存在
check_await $ADN_DEVICE_DAILY_PATH/frankfurt/23/_SUCCESS

# 检测dsp每日数据是否存在
check_await $DSP_DEVICE_DAILY_PATH/_SUCCESS

# 检测前一天数据是否存在
check_await $OLD_DMP_DEVICE_TOTAL_PATH/_SUCCESS

hadoop fs -mkdir $DMP_DEVICE_TOTAL_PATH;


hql="
use dwh;

alter table ods_ga_device_daily add if not exists partition (\`date\`='${ga_date}') location '${GA_DEVICE_DAILY_PATH}';
alter table etl_dsp_request_daily add if not exists partition (\`date\`='${date}') location '${DSP_DEVICE_DAILY_PATH}';
alter table ods_dmp_device_total add if not exists partition (dt='${date}') location '${DMP_DEVICE_TOTAL_PATH}';

drop table tmp_total_device_${date};
create table tmp_total_device_${date} as
select t.device_id, t.device_type, t.country, '${date}' as dt
from (
  select case when t.gaid not in ('0', 'NULL', '') then gaid
  when t.idfa not in ('0', 'NULL', '') then idfa else null end as
  device_id, case when t.gaid not in ('0', 'NULL', '') then 'gaid '
  when t.idfa not in ('0', 'NULL', '') then 'idfa' else 'unknow' end as device_type,
  t.country_code as country
  from ods_adn_trackingnew_request t
  where t.yyyy='${year}' and t.mm='${month}' and t.dd='${day}'

  union all

  select t.device_id, t.device_type, t.country
  from ods_ga_device_daily t
  where t.date='${ga_date:0:4}${ga_date:4:2}${ga_date:6:2}'

  union all

  select t.device_id, t.device_type,t.country_code as country
  from etl_dsp_request_daily t
  where t.date='${year}-${month}-${day}'

) t
group by t.device_id, t.device_type, t.country;


set hive.exec.reducers.max=420;
set mapreduce.task.io.sort.mb=512;
set hive.exec.compress.output=true;
set mapreduce.output.fileoutputformat.compress.type=BLOCK;
set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.SnappyCodec;
insert overwrite table ods_dmp_device_total partition (dt='$date')
select t.device_id, t.device_type, case when t.country='UK' then 'GB' else t.country end as country, t.dt as last_req_day
from (
  select t.device_id, t.device_type, t.country, t.dt, row_number() over(PARTITION BY t.device_id, t.device_type  order by t.dt desc) rk
  from (
    select t.device_id, t.device_type, t.country, '${date}' as dt
    from tmp_total_device_${date} t
    where t.device_id rlike '^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$'

    union all

    select t.device_id, t.device_type, t.country, t.dt
    from ods_dmp_device_total t
    where t.dt='$old_date' and t.device_id rlike '^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$'
  ) t
) t
where t.rk = 1;

drop table tmp_total_device_${date};
"

$offline_hive -e "$hql"
if [ $? -ne 0 ];then
  exit 255
fi

hadoop fs -touchz $DMP_DEVICE_TOTAL_PATH/_SUCCESS
if [ $? -ne 0 ];then
  exit 255
fi

# 值保留三天数据

expireDate=`date -d"$date 3 days ago" +"%Y%m%d"`
hql="
use dwh;alter table ods_dmp_device_total drop partition (dt='${expireDate}');
"
$offline_hive -e "$hql"
if [ $? -ne 0 ];then
  exit 255
fi

expirePath="${DMP_TOTAL_PATH}/${expireDate:0:4}/${expireDate:4:2}/${expireDate:6:2}/"
hadoop fs -rm -r $expirePath
