all_count_country_interest.sh 4.97 KB
#!/bin/sh

# # # # # # # # # # # # # # # # # # # # # #
# @file    :dsp_count_country_interest.sh
# @author  :fengliang
# @revision:${year}-09-08
# @desc    :DMP 分国家分兴趣统计
# # # # # # # # # # # # # # # # # # # # # #

source ../prd_env.sh

echo "ScheduleTime=$ScheduleTime"
LOG_TIME=$(date +%Y%m%d -d "-2 days $ScheduleTime")
dt=$(date +"%Y-%m-%d" -d "-2 days $ScheduleTime")
tag_time=$(date +%Y%m%d -d "-1 days $ScheduleTime")
ga_time=$(get_recently_date $GA_DAILY_PATH $LOG_TIME "")

today=$(ts=Asia/Shanghai date +%Y%m%d -d"1 day ago")
device_time=$(get_recently_date $DMP_TOTAL_PATH $today "")

date_path=$(date +"%Y/%m/%d" -d "-2 days $ScheduleTime")
device_path=$(date +"%Y/%m/%d" -d "$device_time")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}

check_await $INSTALL_DAILY_3S/$date_path/_SUCCESS
#check_await $TMP_ADSERVER_PACKAGE_PATH/$date_path/_SUCCESS
check_await $DIM_ADN_CAMPAIGN/$date_path/_SUCCESS
check_await $ETL_ADN_SDK_REQUEST_DAILY/$date_path/_SUCCESS
check_await $ETL_DSP_REQ_DAILY/$date_path/_SUCCESS
check_await $MP_REQUEST_DAILY_PATH/$date_path/_SUCCESS
check_await $DMP_TOTAL_PATH/$device_path/_SUCCESS

OUTPUT_PATH="$REQUEST_PATH/all_count_country_interest/$date_path"

hql="
use dwh;
drop table tmp_device_country_$LOG_TIME;
drop table tmp_country_tag_${LOG_TIME};
drop table tmp_adn_install_${LOG_TIME};
drop table tmp_adn_request_${LOG_TIME};
drop table tmp_ga_install_${LOG_TIME};

set mapreduce.task.io.sort.mb=512;

create table tmp_adn_install_${LOG_TIME} as
select a.device_id, a.device_type, b.country
  from (
    select t.device_id, t.device_type
    from etl_adn_install_daily t
    where t.date='${LOG_TIME}'
  ) a
  left outer join (
    select device_id, t.id_type as device_type, t.country_code as country
    from ods_adn_device_total t
    where t.year='${year}' and t.month='${month}' and t.day='${day}'
  ) b on a.device_id=b.device_id and a.device_type=b.device_type;

create table tmp_adn_request_${LOG_TIME} as
 select a.device_id, a.device_type, b.country
  from (
    select t.device_id, t.device_type
    from etl_adn_sdk_request_daily t
    where t.date='${LOG_TIME}'
  ) a
  left outer join (
    select device_id, t.id_type as device_type, t.country_code as country
    from ods_adn_device_total t
    where t.year='${year}' and t.month='${month}' and t.day='${day}'
  ) b on a.device_id=b.device_id and a.device_type=b.device_type;

create table tmp_ga_install_${LOG_TIME} as
 select a.device_id, a.device_type, b.country
  from (
    select t.device_id, t.device_type
    from ods_ga_install_daily t
    where t.date='$ga_time'
  ) a
  join (
    select t.device_id, t.device_type, t.country
    from ods_ga_device_total t
    where t.year='${year}' and t.month='$month' and t.day='${day}'
  ) b on a.device_id=b.device_id and a.device_type=b.device_type;

create table tmp_device_country_${LOG_TIME}(
  device_id string,
  device_type string,
  country string
)
stored as rcfile;

insert overwrite table tmp_device_country_${LOG_TIME}
select t.device_id, t.device_type,
  case when t.country rlike '[A-Z0-9]{2}' then t.country
  else 'unknow' end as country
from (
  select t.device_id, t.device_type, t.country
  from etl_3s_install_daily t
  where t.date='${LOG_TIME}'

  union all
  
  select device_id, device_type, country
  from tmp_adn_install_${LOG_TIME}
  
  union all
  
  select device_id, device_type, country
  from tmp_adn_request_${LOG_TIME}
  
  union all
  select t.device_id, t.device_type, t.country_code as country
  from etl_dsp_request_daily t
  where t.date='${dt}'
  
  union all
  
  select device_id, device_type, country
  from tmp_ga_install_${LOG_TIME}
) t
group by t.device_id, t.device_type,
  case when t.country rlike '[A-Z0-9]{2}' then t.country
  else 'unknow' end ;


add jar ../${JAR};
create temporary function expansionTags as 'mobvista.prd.datasource.udf.ExpansionTags';

create table tmp_country_tag_${LOG_TIME} (
  device_id string,
  device_type string,
  country string,
  tags string
)
STORED as rcfile;

insert overwrite table tmp_country_tag_${LOG_TIME}
select a.device_id,a.device_type, a.country, b.tags
from tmp_device_country_${LOG_TIME} a
join (
  select a.device_id, a.device_type, a.tags
  from dm_interest_tag a
  where a.year='${tag_time:0:4}' and a.month='${tag_time:4:2}' and a.day='${tag_time:6:2}' and a.business='all'
) b on a.device_id=b.device_id and a.device_type=b.device_type;


insert overwrite directory '$OUTPUT_PATH'
select t.country, t.firstTag, t.secondTag, count(1) as cnt
from (
select a.device_id, a.device_type, a.country, b.firstTag, b.secondTag
from tmp_country_tag_${LOG_TIME} a
LATERAL VIEW expansionTags(a.tags) b as firstTag, secondTag
group by a.device_id, a.device_type, a.country, b.firstTag, b.secondTag
) t
group by t.country, t.firstTag, t.secondTag;

drop table tmp_device_country_${LOG_TIME};
drop table tmp_country_tag_${LOG_TIME};
drop table tmp_adn_install_${LOG_TIME};
drop table tmp_adn_request_${LOG_TIME};
drop table tmp_ga_install_${LOG_TIME};
"
hive_cmd "$hql"
if [ $? -ne 0 ];then
  exit 255
fi