all_count_country_interest.sh 4.97 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
#!/bin/sh

# # # # # # # # # # # # # # # # # # # # # #
# @file    :dsp_count_country_interest.sh
# @author  :fengliang
# @revision:${year}-09-08
# @desc    :DMP 分国家分兴趣统计
# # # # # # # # # # # # # # # # # # # # # #

source ../prd_env.sh

echo "ScheduleTime=$ScheduleTime"
LOG_TIME=$(date +%Y%m%d -d "-2 days $ScheduleTime")
dt=$(date +"%Y-%m-%d" -d "-2 days $ScheduleTime")
tag_time=$(date +%Y%m%d -d "-1 days $ScheduleTime")
ga_time=$(get_recently_date $GA_DAILY_PATH $LOG_TIME "")

today=$(ts=Asia/Shanghai date +%Y%m%d -d"1 day ago")
device_time=$(get_recently_date $DMP_TOTAL_PATH $today "")

date_path=$(date +"%Y/%m/%d" -d "-2 days $ScheduleTime")
device_path=$(date +"%Y/%m/%d" -d "$device_time")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}

check_await $INSTALL_DAILY_3S/$date_path/_SUCCESS
#check_await $TMP_ADSERVER_PACKAGE_PATH/$date_path/_SUCCESS
check_await $DIM_ADN_CAMPAIGN/$date_path/_SUCCESS
check_await $ETL_ADN_SDK_REQUEST_DAILY/$date_path/_SUCCESS
check_await $ETL_DSP_REQ_DAILY/$date_path/_SUCCESS
check_await $MP_REQUEST_DAILY_PATH/$date_path/_SUCCESS
check_await $DMP_TOTAL_PATH/$device_path/_SUCCESS

OUTPUT_PATH="$REQUEST_PATH/all_count_country_interest/$date_path"

hql="
use dwh;
drop table tmp_device_country_$LOG_TIME;
drop table tmp_country_tag_${LOG_TIME};
drop table tmp_adn_install_${LOG_TIME};
drop table tmp_adn_request_${LOG_TIME};
drop table tmp_ga_install_${LOG_TIME};

set mapreduce.task.io.sort.mb=512;

create table tmp_adn_install_${LOG_TIME} as
select a.device_id, a.device_type, b.country
  from (
    select t.device_id, t.device_type
    from etl_adn_install_daily t
    where t.date='${LOG_TIME}'
  ) a
  left outer join (
    select device_id, t.id_type as device_type, t.country_code as country
    from ods_adn_device_total t
    where t.year='${year}' and t.month='${month}' and t.day='${day}'
  ) b on a.device_id=b.device_id and a.device_type=b.device_type;

create table tmp_adn_request_${LOG_TIME} as
 select a.device_id, a.device_type, b.country
  from (
    select t.device_id, t.device_type
    from etl_adn_sdk_request_daily t
    where t.date='${LOG_TIME}'
  ) a
  left outer join (
    select device_id, t.id_type as device_type, t.country_code as country
    from ods_adn_device_total t
    where t.year='${year}' and t.month='${month}' and t.day='${day}'
  ) b on a.device_id=b.device_id and a.device_type=b.device_type;

create table tmp_ga_install_${LOG_TIME} as
 select a.device_id, a.device_type, b.country
  from (
    select t.device_id, t.device_type
    from ods_ga_install_daily t
    where t.date='$ga_time'
  ) a
  join (
    select t.device_id, t.device_type, t.country
    from ods_ga_device_total t
    where t.year='${year}' and t.month='$month' and t.day='${day}'
  ) b on a.device_id=b.device_id and a.device_type=b.device_type;

create table tmp_device_country_${LOG_TIME}(
  device_id string,
  device_type string,
  country string
)
stored as rcfile;

insert overwrite table tmp_device_country_${LOG_TIME}
select t.device_id, t.device_type,
  case when t.country rlike '[A-Z0-9]{2}' then t.country
  else 'unknow' end as country
from (
  select t.device_id, t.device_type, t.country
  from etl_3s_install_daily t
  where t.date='${LOG_TIME}'

  union all
  
  select device_id, device_type, country
  from tmp_adn_install_${LOG_TIME}
  
  union all
  
  select device_id, device_type, country
  from tmp_adn_request_${LOG_TIME}
  
  union all
  select t.device_id, t.device_type, t.country_code as country
  from etl_dsp_request_daily t
  where t.date='${dt}'
  
  union all
  
  select device_id, device_type, country
  from tmp_ga_install_${LOG_TIME}
) t
group by t.device_id, t.device_type,
  case when t.country rlike '[A-Z0-9]{2}' then t.country
  else 'unknow' end ;


add jar ../${JAR};
create temporary function expansionTags as 'mobvista.prd.datasource.udf.ExpansionTags';

create table tmp_country_tag_${LOG_TIME} (
  device_id string,
  device_type string,
  country string,
  tags string
)
STORED as rcfile;

insert overwrite table tmp_country_tag_${LOG_TIME}
select a.device_id,a.device_type, a.country, b.tags
from tmp_device_country_${LOG_TIME} a
join (
  select a.device_id, a.device_type, a.tags
  from dm_interest_tag a
  where a.year='${tag_time:0:4}' and a.month='${tag_time:4:2}' and a.day='${tag_time:6:2}' and a.business='all'
) b on a.device_id=b.device_id and a.device_type=b.device_type;


insert overwrite directory '$OUTPUT_PATH'
select t.country, t.firstTag, t.secondTag, count(1) as cnt
from (
select a.device_id, a.device_type, a.country, b.firstTag, b.secondTag
from tmp_country_tag_${LOG_TIME} a
LATERAL VIEW expansionTags(a.tags) b as firstTag, secondTag
group by a.device_id, a.device_type, a.country, b.firstTag, b.secondTag
) t
group by t.country, t.firstTag, t.secondTag;

drop table tmp_device_country_${LOG_TIME};
drop table tmp_country_tag_${LOG_TIME};
drop table tmp_adn_install_${LOG_TIME};
drop table tmp_adn_request_${LOG_TIME};
drop table tmp_ga_install_${LOG_TIME};
"
hive_cmd "$hql"
if [ $? -ne 0 ];then
  exit 255
fi