use emr_doppler_v1;

set hive.exec.compress.output=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set mapred.max.split.size=536870912;
set mapred.min.split.size.per.node=536870912;
set mapred.min.split.size.per.rack=536870912;
set mapreduce.map.memory.mb=2048;
set mapreduce.reduce.memory.mb=3072;
set hive.map.aggr = true;
set hive.map.aggr = true;
set hive.optimize.skewjoin=true;

-- ALTER TABLE emr_doppler_v1.player_checkpoint_daily SET LOCATION 's3://mob-emr-test/dataplatform/emr/player_checkpoint_daily';

-- ALTER TABLE emr_doppler_v1.player_checkpoint SET LOCATION 's3://mob-emr-test/dataplatform/emr/player_checkpoint';

ALTER TABLE player_checkpoint_daily
    ADD IF NOT EXISTS PARTITION (dt = '${part_pre1_path}') LOCATION 's3://mob-emr-test/dataplatform/emr/${pre1_dateslash}/player_checkpoint';
ALTER TABLE player_checkpoint
    ADD IF NOT EXISTS PARTITION (dt = '${part_pre1_path}') LOCATION 's3://mob-emr-test/dataplatform/emr/${pre1_dateslash}/player_checkpoint';

INSERT overwrite table player_checkpoint_7days
SELECT game_id,
       player_id,
       SUM(revenue)              AS revenue_week,
       SUM(sum_session_length)   AS sum_session_length_week,
       SUM(count_session_length) AS count_session_length_week,
       count(*)                  AS days_played_week
FROM player_checkpoint_daily
WHERE dt <= '${part_pre1_path}'
  AND dt > '${part_pre8_path}'
GROUP BY game_id, player_id;

INSERT overwrite table player_checkpoint_30days
SELECT game_id,
       player_id,
       SUM(revenue)              AS revenue_month,
       SUM(sum_session_length)   AS sum_session_length_month,
       SUM(count_session_length) AS count_session_length_month,
       count(*)                  AS days_played_month
FROM player_checkpoint_daily
WHERE dt <= '${part_pre1_path}'
  AND dt > '${part_pre31_path}'
GROUP BY game_id, player_id;

INSERT overwrite table player_checkpoint_tmp
select t1.game_id,
       t1.checkpoint,
       t1.player_id,
       t1.user_id,
       t1.is_payer,
       t1.is_day_payer,
       t1.first_payed,
       t1.installed,
       t1.install_publisher,
       t1.install_site,
       t1.install_campaign,
       t1.install_adgroup,
       t1.install_ad,
       t1.install_keyword,
       t1.is_acquired,
       t1.revenue,
       t1.count_transaction_num,
       t1.min_transaction_num,
       t1.max_transaction_num,
       t1.country_code,
       t1.facebook_id,
       t1.ios_id,
       t1.ios_idfa,
       t1.ios_idfv,
       t1.android_id,
       t1.google_aid,
       t1.googleplus_id,
       t1.is_ad_tracking_limited,
       t1.is_logged_gamecenter,
       t1.is_logged_googleplay,
       t1.gender,
       t1.birth_year,
       t1.sdk_version,
       t1.engine_version,
       t1.os_version,
       t1.manufacturer,
       t1.device,
       t1.platform,
       t1.build,
       t1.connection_type,
       t1.is_jailbroken,
       t1.count_session_length,
       t1.min_session_length,
       t1.max_session_length,
       t1.sum_session_length,
       t1.sum2_session_length,
       t1.min_session_num,
       t1.max_session_num,
       t1.count_event,
       t1.count_transaction,
       t1.count_debug_error,
       t1.count_info_error,
       t1.count_warning_error,
       t1.count_error_error,
       t1.count_critical_error,
       t7.revenue_week,
       t30.revenue_month,
       t7.sum_session_length_week,
       t30.sum_session_length_month,
       t7.count_session_length_week,
       t30.count_session_length_month,
       t7.days_played_week,
       t30.days_played_month
from player_checkpoint_${date_str_undline} t1
         join player_checkpoint_7days t7 on (t1.game_id = t7.game_id and t1.player_id = t7.player_id)
         join player_checkpoint_30days t30 on (t1.game_id = t30.game_id and t1.player_id = t30.player_id);

insert overwrite table player_checkpoint partition (dt = '${part_pre1_path}')
select coalesce(t2.game_id, t1.game_id),
       coalesce(t2.checkpoint, t1.checkpoint),
       coalesce(t2.player_id, t1.player_id),
       coalesce(t2.user_id, t1.user_id),
       coalesce(t2.is_payer, t1.is_payer),
       coalesce(t2.is_day_payer, t1.is_day_payer),
       coalesce(t2.first_payed, t1.first_payed),
       coalesce(t2.installed, t1.installed),
       coalesce(t2.install_publisher, t1.install_publisher),
       coalesce(t2.install_site, t1.install_site),
       coalesce(t2.install_campaign, t1.install_campaign),
       coalesce(t2.install_adgroup, t1.install_adgroup),
       coalesce(t2.install_ad, t1.install_ad),
       coalesce(t2.install_keyword, t1.install_keyword),
       coalesce(t2.is_acquired, t1.is_acquired),
       coalesce(t2.revenue, t1.revenue),
       coalesce(t2.count_transaction_num, t1.count_transaction_num),
       coalesce(t2.min_transaction_num, t1.min_transaction_num),
       coalesce(t2.max_transaction_num, t1.max_transaction_num),
       coalesce(t2.country_code, t1.country_code),
       coalesce(t2.facebook_id, t1.facebook_id),
       coalesce(t2.ios_id, t1.ios_id),
       coalesce(t2.ios_idfa, t1.ios_idfa),
       coalesce(t2.ios_idfv, t1.ios_idfv),
       coalesce(t2.android_id, t1.android_id),
       coalesce(t2.google_aid, t1.google_aid),
       coalesce(t2.googleplus_id, t1.googleplus_id),
       coalesce(t2.is_ad_tracking_limited, t1.is_ad_tracking_limited),
       coalesce(t2.is_logged_gamecenter, t1.is_logged_gamecenter),
       coalesce(t2.is_logged_googleplay, t1.is_logged_googleplay),
       coalesce(t2.gender, t1.gender),
       coalesce(t2.birth_year, t1.birth_year),
       coalesce(t2.sdk_version, t1.sdk_version),
       coalesce(t2.engine_version, t1.engine_version),
       coalesce(t2.os_version, t1.os_version),
       coalesce(t2.manufacturer, t1.manufacturer),
       coalesce(t2.device, t1.device),
       coalesce(t2.platform, t1.platform),
       coalesce(t2.build, t1.build),
       coalesce(t2.connection_type, t1.connection_type),
       coalesce(t2.is_jailbroken, t1.is_jailbroken),
       coalesce(t2.count_session_length, t1.count_session_length),
       coalesce(t2.min_session_length, t1.min_session_length),
       coalesce(t2.max_session_length, t1.max_session_length),
       coalesce(t2.sum_session_length, t1.sum_session_length),
       coalesce(t2.sum2_session_length, t1.sum2_session_length),
       coalesce(t2.min_session_num, t1.min_session_num),
       coalesce(t2.max_session_num, t1.max_session_num),
       coalesce(t2.count_event, t1.count_event),
       coalesce(t2.count_transaction, t1.count_transaction),
       coalesce(t2.count_debug_error, t1.count_debug_error),
       coalesce(t2.count_info_error, t1.count_info_error),
       coalesce(t2.count_warning_error, t1.count_warning_error),
       coalesce(t2.count_error_error, t1.count_error_error),
       coalesce(t2.count_critical_error, t1.count_critical_error),
       coalesce(t2.revenue_week, t1.revenue_week),
       coalesce(t2.revenue_month, t1.revenue_month),
       coalesce(t2.sum_session_length_week, t1.sum_session_length_week),
       coalesce(t2.sum_session_length_month, t1.sum_session_length_month),
       coalesce(t2.count_session_length_week, t1.count_session_length_week),
       coalesce(t2.count_session_length_month, t1.count_session_length_month),
       coalesce(t2.days_played_week, t1.days_played_week),
       coalesce(t2.days_played_month, t1.days_played_month)
from (select * from player_checkpoint where dt = '${part_pre2_path}') t1
         full join player_checkpoint_tmp t2 on (t1.game_id = t2.game_id and t1.player_id = t2.player_id);

INSERT overwrite TABLE tmp_daily_export_player_checkpoint_device_stats
SELECT device_id,
       platform,
       country,
       first_seen,
       num_transactions,
       total_games,
       action_games,
       arcade_games,
       puzzle_games,
       adventure_games,
       casual_games,
       simulation_games,
       strategy_games,
       educational_games,
       family_games,
       role_playing_games,
       sessions_count_30d,
       cast(sum_sessions_30d as float) / cast(sessions_count_30d as float) as session_length_30d,
       sessions_count_7d,
       cast(sum_sessions_7d as float) / cast(sessions_count_7d as float)   as session_length_7d,
       sessions_count,
       cast(sum_sessions as float) / cast(sessions_count as float)         as session_length,
       facebook_id,
       googleplus_id,
       gender,
       birth_year,
       revenue_week,
       revenue_month,
       device_purchase
FROM (
         SELECT case
                    when ios_idfa IS NOT NULL then ios_idfa
                    when google_aid IS NOT NULL then google_aid
                    end                                                                             as device_id,
                case
                    when ios_idfa IS NOT NULL then 'iOS'
                    when google_aid IS NOT NULL then 'Android'
                    end                                                                             as platform,
                MIN(country_code)                                                                   as country,
                MIN(checkpoint)                                                                     as first_seen,
                SUM(count_transaction)                                                              as num_transactions,
                COUNT(DISTINCT p.game_id)                                                           as total_games,
                COUNT(DISTINCT case when c.category = 'Action' then p.game_id end)                  as action_games,
                COUNT(DISTINCT case when c.category = 'Arcade' then p.game_id end)                  as arcade_games,
                COUNT(DISTINCT case when c.category = 'Puzzle' then p.game_id end)                  as puzzle_games,
                COUNT(DISTINCT case when c.category = 'Adventure' then p.game_id end)               as adventure_games,
                COUNT(DISTINCT case when c.category = 'Casual' then p.game_id end)                  as casual_games,
                COUNT(DISTINCT case when c.category = 'Simulation' then p.game_id end)              as simulation_games,
                COUNT(DISTINCT case when c.category = 'Strategy' then p.game_id end)                as strategy_games,
                COUNT(DISTINCT case when c.category = 'Educational' then p.game_id end)             as educational_games,
                COUNT(DISTINCT case when c.category = 'Family' then p.game_id end)                  as family_games,
                COUNT(DISTINCT case when c.category = 'Role Playing' then p.game_id end)            as role_playing_games,
                SUM(case
                        when DATEDIFF('${cur_date}', checkpoint) <= 30
                            then count_session_length end)                                          as sessions_count_30d,
                SUM(case
                        when DATEDIFF('${cur_date}', checkpoint) <= 30
                            then sum_session_length end)                                            as sum_sessions_30d,
                SUM(case
                        when DATEDIFF('${cur_date}', checkpoint) <= 7
                            then count_session_length end)                                          as sessions_count_7d,
                SUM(case when DATEDIFF('${cur_date}', checkpoint) <= 7 then sum_session_length end) as sum_sessions_7d,
                SUM(count_session_length)                                                           as sessions_count,
                SUM(sum_session_length)                                                             as sum_sessions,
                MIN(facebook_id)                                                                    as facebook_id,
                MIN(googleplus_id)                                                                  as googleplus_id,
                MIN(gender)                                                                         as gender,
                MIN(birth_year)                                                                     as birth_year,
                MIN(revenue_week)                                                                   as revenue_week,
                MIN(revenue_month)                                                                  as revenue_month,
                SUM(case when is_day_payer = '01' then 1 when is_day_payer = 'false' then 0 end)    as device_purchase
         FROM player_checkpoint p
                  JOIN game_categories c
                       ON p.game_id = c.game_id
         WHERE p.dt = '${part_pre1_path}'
           AND cast(p.checkpoint as string) = '${part_pre1_dash}'
           AND (ios_idfa IS NOT NULL OR google_aid IS NOT NULL)
         GROUP BY case
                      when ios_idfa IS NOT NULL then ios_idfa
                      when google_aid IS NOT NULL then google_aid
                      end,
                  case
                      when ios_idfa IS NOT NULL then 'iOS'
                      when google_aid IS NOT NULL then 'Android'
                      end
     ) as p1;

INSERT overwrite TABLE tmp_daily_export_game_fields
SELECT device_id,
       AVG(cast(game_sessions as float)) as average_sessions_per_app,
       AVG(cast(game_days as float))     as average_days_per_app
FROM (SELECT case
                 when ios_idfa IS NOT NULL then ios_idfa
                 when google_aid IS NOT NULL then google_aid
                 end                    as device_id,
             game_id,
             SUM(count_session_length)  as game_sessions,
             COUNT(DISTINCT checkpoint) as game_days
      FROM player_checkpoint p
      WHERE dt = '${part_pre1_path}'
        and cast(p.checkpoint as string) = '${part_pre1_dash}'
      GROUP BY case
                   when ios_idfa IS NOT NULL then ios_idfa
                   when google_aid IS NOT NULL then google_aid
                   end,
               game_id
     ) p
GROUP BY device_id;

DROP TABLE player_device_metrics_${date_str_undline};
CREATE EXTERNAL TABLE `player_device_metrics_${date_str_undline}`
(
    `device_id`                string,
    `platform`                 string,
    `country`                  string,
    `first_seen`               date,
    `num_transactions`         bigint,
    `total_games`              bigint,
    `action_games`             bigint,
    `arcade_games`             bigint,
    `puzzle_games`             bigint,
    `adventure_games`          bigint,
    `casual_games`             bigint,
    `simulation_games`         bigint,
    `strategy_games`           bigint,
    `educational_games`        bigint,
    `family_games`             bigint,
    `role_playing_games`       bigint,
    `sessions_count_30d`       bigint,
    `session_length_30d`       double,
    `sessions_count_7d`        bigint,
    `session_length_7d`        double,
    `sessions_count`           bigint,
    `session_length`           double,
    `facebook_id`              string,
    `googleplus_id`            string,
    `gender`                   string,
    `birth_year`               string,
    `revenue_week`             float,
    `revenue_month`            float,
    `average_sessions_per_app` double,
    `average_days_per_app`     double,
    `device_purchase`          bigint
)
    ROW FORMAT SERDE
        'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
        WITH SERDEPROPERTIES (
        'field.delim' = '|',
        'serialization.format' = '|')
    STORED AS INPUTFORMAT
        'org.apache.hadoop.mapred.TextInputFormat'
        OUTPUTFORMAT
            'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
    LOCATION
        's3://mob-emr-test/dataplatform/datawarehourse/dmp/daily_export/${pre1_dateslash}/device-metrics';

insert overwrite table player_device_metrics_${date_str_undline}
SELECT a.device_id,
       a.platform,
       a.country,
       a.first_seen,
       a.num_transactions,
       a.total_games,
       a.action_games,
       a.arcade_games,
       a.puzzle_games,
       a.adventure_games,
       a.casual_games,
       a.simulation_games,
       a.strategy_games,
       a.educational_games,
       a.family_games,
       a.role_playing_games,
       a.sessions_count_30d,
       a.session_length_30d,
       a.sessions_count_7d,
       a.session_length_7d,
       a.sessions_count,
       a.session_length,
       a.facebook_id,
       a.googleplus_id,
       a.gender,
       a.birth_year,
       a.revenue_week,
       a.revenue_month,
       d.average_sessions_per_app,
       d.average_days_per_app,
       a.device_purchase
FROM tmp_daily_export_player_checkpoint_device_stats a
         JOIN tmp_daily_export_game_fields d
              ON d.device_id = a.device_id;

-- ALTER TABLE player_store_game SET LOCATION 's3://mob-emr-test/dataplatform/emr/store_game';

INSERT OVERWRITE TABLE emr_doppler.all_tmp_game_store
SELECT game_id, store_id
FROM emr_doppler.player_store_game WHERE store_id IS NOT NULL;

DROP TABLE player_installation_list_${date_str_undline};
CREATE EXTERNAL TABLE `player_installation_list_${date_str_undline}`
(
    `device_id`    string,
    `platform`     string,
    `package_name` string,
    `store_link`   string
)
    ROW FORMAT SERDE
        'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
        WITH SERDEPROPERTIES (
        'field.delim' = '|',
        'serialization.format' = '|')
    STORED AS INPUTFORMAT
        'org.apache.hadoop.mapred.TextInputFormat'
        OUTPUTFORMAT
            'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
    LOCATION
        's3://mob-emr-test/dataplatform/datawarehourse/dmp/daily_export/${pre1_dateslash}/installation-list';

insert overwrite table player_installation_list_${date_str_undline}
SELECT case
           when ios_idfa IS NOT NULL then ios_idfa
           when google_aid IS NOT NULL then google_aid
           end  as device_id,
       platform,
       store_id as package_name,
       case
           when LOWER(platform) = 'android' then 'https://play.google.com/store/apps/details?id=' || store_id
           when LOWER(platform) = 'ios' then 'https://itunes.apple.com/app/id' || store_id
           end  as store_link
FROM player_checkpoint pc,
     emr_doppler.all_tmp_game_store gs
WHERE not (ios_idfa is null and google_aid is null)
  AND pc.dt = '${part_pre1_path}'
  AND cast(pc.checkpoint as string) = '${part_pre1_dash}'
  AND pc.game_id = gs.game_id
  AND not gs.store_id is null
GROUP BY case
             when ios_idfa IS NOT NULL then ios_idfa
             when google_aid IS NOT NULL then google_aid
             end,
         platform,
         gs.store_id;

ALTER TABLE player_checkpoint_ga
    SET LOCATION 's3://mob-emr-test/dataplatform/datawarehourse/ga/player_checkpoint_ga';

ALTER TABLE player_checkpoint_ga
    ADD IF NOT EXISTS PARTITION (dt = '${part_pre1_path}') LOCATION 's3://mob-emr-test/dataplatform/datawarehourse/ga/player_checkpoint_ga/${part_pre1_path}';

insert overwrite table player_checkpoint_ga partition (dt = '${part_pre1_path}')
select coalesce(max(ios_idfa), max(google_aid), max(android_id)) as dev_id,
       max(device)                                               as model,
       max(country_code)                                         as country,
       max(os_version)                                           as osversion
from player_checkpoint pc
where pc.dt = '${part_pre1_path}'
  AND cast(pc.checkpoint as string) = '${part_pre1_dash}'
group by coalesce(ios_idfa, google_aid, android_id);

DROP TABLE dau_device_export_${date_str_undline};
create external table IF NOT EXISTS dau_device_export_${date_str_undline}
(
    device_id  string,
    platform   string,
    android_id string
)
    row format delimited fields terminated by '|'
    stored as textfile
    location 's3://mob-emr-test/dataplatform/datawarehourse/dmp/daily_export/${pre1_dateslash}/dau-device-data-export';

insert overwrite table dau_device_export_${date_str_undline}
SELECT COALESCE(ios_idfa, google_aid)                       as device_id,
       platform,
       case when google_aid is not null then android_id end as android_id
FROM player_checkpoint pc
where pc.dt = '${part_pre1_path}'
  AND cast(pc.checkpoint as string) = '${part_pre1_dash}'
  and not (ios_idfa is null and google_aid is null);

drop table dau_device_export_${date_str_undline};
DROP TABLE player_device_metrics_${date_str_undline};
DROP table player_installation_list_${date_str_undline};