use emr_doppler_v1; set hive.exec.compress.output=true; set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; set mapred.max.split.size=536870912; set mapred.min.split.size.per.node=536870912; set mapred.min.split.size.per.rack=536870912; set mapreduce.map.memory.mb=2048; set mapreduce.reduce.memory.mb=3072; set hive.map.aggr = true; set hive.map.aggr = true; set hive.optimize.skewjoin=true; -- ALTER TABLE emr_doppler_v1.player_checkpoint_daily SET LOCATION 's3://mob-emr-test/dataplatform/emr/player_checkpoint_daily'; -- ALTER TABLE emr_doppler_v1.player_checkpoint SET LOCATION 's3://mob-emr-test/dataplatform/emr/player_checkpoint'; ALTER TABLE player_checkpoint_daily ADD IF NOT EXISTS PARTITION (dt = '${part_pre1_path}') LOCATION 's3://mob-emr-test/dataplatform/emr/${pre1_dateslash}/player_checkpoint'; ALTER TABLE player_checkpoint ADD IF NOT EXISTS PARTITION (dt = '${part_pre1_path}') LOCATION 's3://mob-emr-test/dataplatform/emr/${pre1_dateslash}/player_checkpoint'; INSERT overwrite table player_checkpoint_7days SELECT game_id, player_id, SUM(revenue) AS revenue_week, SUM(sum_session_length) AS sum_session_length_week, SUM(count_session_length) AS count_session_length_week, count(*) AS days_played_week FROM player_checkpoint_daily WHERE dt <= '${part_pre1_path}' AND dt > '${part_pre8_path}' GROUP BY game_id, player_id; INSERT overwrite table player_checkpoint_30days SELECT game_id, player_id, SUM(revenue) AS revenue_month, SUM(sum_session_length) AS sum_session_length_month, SUM(count_session_length) AS count_session_length_month, count(*) AS days_played_month FROM player_checkpoint_daily WHERE dt <= '${part_pre1_path}' AND dt > '${part_pre31_path}' GROUP BY game_id, player_id; INSERT overwrite table player_checkpoint_tmp select t1.game_id, t1.checkpoint, t1.player_id, t1.user_id, t1.is_payer, t1.is_day_payer, t1.first_payed, t1.installed, t1.install_publisher, t1.install_site, t1.install_campaign, t1.install_adgroup, t1.install_ad, t1.install_keyword, t1.is_acquired, t1.revenue, t1.count_transaction_num, t1.min_transaction_num, t1.max_transaction_num, t1.country_code, t1.facebook_id, t1.ios_id, t1.ios_idfa, t1.ios_idfv, t1.android_id, t1.google_aid, t1.googleplus_id, t1.is_ad_tracking_limited, t1.is_logged_gamecenter, t1.is_logged_googleplay, t1.gender, t1.birth_year, t1.sdk_version, t1.engine_version, t1.os_version, t1.manufacturer, t1.device, t1.platform, t1.build, t1.connection_type, t1.is_jailbroken, t1.count_session_length, t1.min_session_length, t1.max_session_length, t1.sum_session_length, t1.sum2_session_length, t1.min_session_num, t1.max_session_num, t1.count_event, t1.count_transaction, t1.count_debug_error, t1.count_info_error, t1.count_warning_error, t1.count_error_error, t1.count_critical_error, t7.revenue_week, t30.revenue_month, t7.sum_session_length_week, t30.sum_session_length_month, t7.count_session_length_week, t30.count_session_length_month, t7.days_played_week, t30.days_played_month from player_checkpoint_${date_str_undline} t1 join player_checkpoint_7days t7 on (t1.game_id = t7.game_id and t1.player_id = t7.player_id) join player_checkpoint_30days t30 on (t1.game_id = t30.game_id and t1.player_id = t30.player_id); insert overwrite table player_checkpoint partition (dt = '${part_pre1_path}') select coalesce(t2.game_id, t1.game_id), coalesce(t2.checkpoint, t1.checkpoint), coalesce(t2.player_id, t1.player_id), coalesce(t2.user_id, t1.user_id), coalesce(t2.is_payer, t1.is_payer), coalesce(t2.is_day_payer, t1.is_day_payer), coalesce(t2.first_payed, t1.first_payed), coalesce(t2.installed, t1.installed), coalesce(t2.install_publisher, t1.install_publisher), coalesce(t2.install_site, t1.install_site), coalesce(t2.install_campaign, t1.install_campaign), coalesce(t2.install_adgroup, t1.install_adgroup), coalesce(t2.install_ad, t1.install_ad), coalesce(t2.install_keyword, t1.install_keyword), coalesce(t2.is_acquired, t1.is_acquired), coalesce(t2.revenue, t1.revenue), coalesce(t2.count_transaction_num, t1.count_transaction_num), coalesce(t2.min_transaction_num, t1.min_transaction_num), coalesce(t2.max_transaction_num, t1.max_transaction_num), coalesce(t2.country_code, t1.country_code), coalesce(t2.facebook_id, t1.facebook_id), coalesce(t2.ios_id, t1.ios_id), coalesce(t2.ios_idfa, t1.ios_idfa), coalesce(t2.ios_idfv, t1.ios_idfv), coalesce(t2.android_id, t1.android_id), coalesce(t2.google_aid, t1.google_aid), coalesce(t2.googleplus_id, t1.googleplus_id), coalesce(t2.is_ad_tracking_limited, t1.is_ad_tracking_limited), coalesce(t2.is_logged_gamecenter, t1.is_logged_gamecenter), coalesce(t2.is_logged_googleplay, t1.is_logged_googleplay), coalesce(t2.gender, t1.gender), coalesce(t2.birth_year, t1.birth_year), coalesce(t2.sdk_version, t1.sdk_version), coalesce(t2.engine_version, t1.engine_version), coalesce(t2.os_version, t1.os_version), coalesce(t2.manufacturer, t1.manufacturer), coalesce(t2.device, t1.device), coalesce(t2.platform, t1.platform), coalesce(t2.build, t1.build), coalesce(t2.connection_type, t1.connection_type), coalesce(t2.is_jailbroken, t1.is_jailbroken), coalesce(t2.count_session_length, t1.count_session_length), coalesce(t2.min_session_length, t1.min_session_length), coalesce(t2.max_session_length, t1.max_session_length), coalesce(t2.sum_session_length, t1.sum_session_length), coalesce(t2.sum2_session_length, t1.sum2_session_length), coalesce(t2.min_session_num, t1.min_session_num), coalesce(t2.max_session_num, t1.max_session_num), coalesce(t2.count_event, t1.count_event), coalesce(t2.count_transaction, t1.count_transaction), coalesce(t2.count_debug_error, t1.count_debug_error), coalesce(t2.count_info_error, t1.count_info_error), coalesce(t2.count_warning_error, t1.count_warning_error), coalesce(t2.count_error_error, t1.count_error_error), coalesce(t2.count_critical_error, t1.count_critical_error), coalesce(t2.revenue_week, t1.revenue_week), coalesce(t2.revenue_month, t1.revenue_month), coalesce(t2.sum_session_length_week, t1.sum_session_length_week), coalesce(t2.sum_session_length_month, t1.sum_session_length_month), coalesce(t2.count_session_length_week, t1.count_session_length_week), coalesce(t2.count_session_length_month, t1.count_session_length_month), coalesce(t2.days_played_week, t1.days_played_week), coalesce(t2.days_played_month, t1.days_played_month) from (select * from player_checkpoint where dt = '${part_pre2_path}') t1 full join player_checkpoint_tmp t2 on (t1.game_id = t2.game_id and t1.player_id = t2.player_id); INSERT overwrite TABLE tmp_daily_export_player_checkpoint_device_stats SELECT device_id, platform, country, first_seen, num_transactions, total_games, action_games, arcade_games, puzzle_games, adventure_games, casual_games, simulation_games, strategy_games, educational_games, family_games, role_playing_games, sessions_count_30d, cast(sum_sessions_30d as float) / cast(sessions_count_30d as float) as session_length_30d, sessions_count_7d, cast(sum_sessions_7d as float) / cast(sessions_count_7d as float) as session_length_7d, sessions_count, cast(sum_sessions as float) / cast(sessions_count as float) as session_length, facebook_id, googleplus_id, gender, birth_year, revenue_week, revenue_month, device_purchase FROM ( SELECT case when ios_idfa IS NOT NULL then ios_idfa when google_aid IS NOT NULL then google_aid end as device_id, case when ios_idfa IS NOT NULL then 'iOS' when google_aid IS NOT NULL then 'Android' end as platform, MIN(country_code) as country, MIN(checkpoint) as first_seen, SUM(count_transaction) as num_transactions, COUNT(DISTINCT p.game_id) as total_games, COUNT(DISTINCT case when c.category = 'Action' then p.game_id end) as action_games, COUNT(DISTINCT case when c.category = 'Arcade' then p.game_id end) as arcade_games, COUNT(DISTINCT case when c.category = 'Puzzle' then p.game_id end) as puzzle_games, COUNT(DISTINCT case when c.category = 'Adventure' then p.game_id end) as adventure_games, COUNT(DISTINCT case when c.category = 'Casual' then p.game_id end) as casual_games, COUNT(DISTINCT case when c.category = 'Simulation' then p.game_id end) as simulation_games, COUNT(DISTINCT case when c.category = 'Strategy' then p.game_id end) as strategy_games, COUNT(DISTINCT case when c.category = 'Educational' then p.game_id end) as educational_games, COUNT(DISTINCT case when c.category = 'Family' then p.game_id end) as family_games, COUNT(DISTINCT case when c.category = 'Role Playing' then p.game_id end) as role_playing_games, SUM(case when DATEDIFF('${cur_date}', checkpoint) <= 30 then count_session_length end) as sessions_count_30d, SUM(case when DATEDIFF('${cur_date}', checkpoint) <= 30 then sum_session_length end) as sum_sessions_30d, SUM(case when DATEDIFF('${cur_date}', checkpoint) <= 7 then count_session_length end) as sessions_count_7d, SUM(case when DATEDIFF('${cur_date}', checkpoint) <= 7 then sum_session_length end) as sum_sessions_7d, SUM(count_session_length) as sessions_count, SUM(sum_session_length) as sum_sessions, MIN(facebook_id) as facebook_id, MIN(googleplus_id) as googleplus_id, MIN(gender) as gender, MIN(birth_year) as birth_year, MIN(revenue_week) as revenue_week, MIN(revenue_month) as revenue_month, SUM(case when is_day_payer = '01' then 1 when is_day_payer = 'false' then 0 end) as device_purchase FROM player_checkpoint p JOIN game_categories c ON p.game_id = c.game_id WHERE p.dt = '${part_pre1_path}' AND cast(p.checkpoint as string) = '${part_pre1_dash}' AND (ios_idfa IS NOT NULL OR google_aid IS NOT NULL) GROUP BY case when ios_idfa IS NOT NULL then ios_idfa when google_aid IS NOT NULL then google_aid end, case when ios_idfa IS NOT NULL then 'iOS' when google_aid IS NOT NULL then 'Android' end ) as p1; INSERT overwrite TABLE tmp_daily_export_game_fields SELECT device_id, AVG(cast(game_sessions as float)) as average_sessions_per_app, AVG(cast(game_days as float)) as average_days_per_app FROM (SELECT case when ios_idfa IS NOT NULL then ios_idfa when google_aid IS NOT NULL then google_aid end as device_id, game_id, SUM(count_session_length) as game_sessions, COUNT(DISTINCT checkpoint) as game_days FROM player_checkpoint p WHERE dt = '${part_pre1_path}' and cast(p.checkpoint as string) = '${part_pre1_dash}' GROUP BY case when ios_idfa IS NOT NULL then ios_idfa when google_aid IS NOT NULL then google_aid end, game_id ) p GROUP BY device_id; DROP TABLE player_device_metrics_${date_str_undline}; CREATE EXTERNAL TABLE `player_device_metrics_${date_str_undline}` ( `device_id` string, `platform` string, `country` string, `first_seen` date, `num_transactions` bigint, `total_games` bigint, `action_games` bigint, `arcade_games` bigint, `puzzle_games` bigint, `adventure_games` bigint, `casual_games` bigint, `simulation_games` bigint, `strategy_games` bigint, `educational_games` bigint, `family_games` bigint, `role_playing_games` bigint, `sessions_count_30d` bigint, `session_length_30d` double, `sessions_count_7d` bigint, `session_length_7d` double, `sessions_count` bigint, `session_length` double, `facebook_id` string, `googleplus_id` string, `gender` string, `birth_year` string, `revenue_week` float, `revenue_month` float, `average_sessions_per_app` double, `average_days_per_app` double, `device_purchase` bigint ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'field.delim' = '|', 'serialization.format' = '|') STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION 's3://mob-emr-test/dataplatform/datawarehourse/dmp/daily_export/${pre1_dateslash}/device-metrics'; insert overwrite table player_device_metrics_${date_str_undline} SELECT a.device_id, a.platform, a.country, a.first_seen, a.num_transactions, a.total_games, a.action_games, a.arcade_games, a.puzzle_games, a.adventure_games, a.casual_games, a.simulation_games, a.strategy_games, a.educational_games, a.family_games, a.role_playing_games, a.sessions_count_30d, a.session_length_30d, a.sessions_count_7d, a.session_length_7d, a.sessions_count, a.session_length, a.facebook_id, a.googleplus_id, a.gender, a.birth_year, a.revenue_week, a.revenue_month, d.average_sessions_per_app, d.average_days_per_app, a.device_purchase FROM tmp_daily_export_player_checkpoint_device_stats a JOIN tmp_daily_export_game_fields d ON d.device_id = a.device_id; -- ALTER TABLE player_store_game SET LOCATION 's3://mob-emr-test/dataplatform/emr/store_game'; INSERT OVERWRITE TABLE emr_doppler.all_tmp_game_store SELECT game_id, store_id FROM emr_doppler.player_store_game WHERE store_id IS NOT NULL; DROP TABLE player_installation_list_${date_str_undline}; CREATE EXTERNAL TABLE `player_installation_list_${date_str_undline}` ( `device_id` string, `platform` string, `package_name` string, `store_link` string ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'field.delim' = '|', 'serialization.format' = '|') STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION 's3://mob-emr-test/dataplatform/datawarehourse/dmp/daily_export/${pre1_dateslash}/installation-list'; insert overwrite table player_installation_list_${date_str_undline} SELECT case when ios_idfa IS NOT NULL then ios_idfa when google_aid IS NOT NULL then google_aid end as device_id, platform, store_id as package_name, case when LOWER(platform) = 'android' then 'https://play.google.com/store/apps/details?id=' || store_id when LOWER(platform) = 'ios' then 'https://itunes.apple.com/app/id' || store_id end as store_link FROM player_checkpoint pc, emr_doppler.all_tmp_game_store gs WHERE not (ios_idfa is null and google_aid is null) AND pc.dt = '${part_pre1_path}' AND cast(pc.checkpoint as string) = '${part_pre1_dash}' AND pc.game_id = gs.game_id AND not gs.store_id is null GROUP BY case when ios_idfa IS NOT NULL then ios_idfa when google_aid IS NOT NULL then google_aid end, platform, gs.store_id; ALTER TABLE player_checkpoint_ga SET LOCATION 's3://mob-emr-test/dataplatform/datawarehourse/ga/player_checkpoint_ga'; ALTER TABLE player_checkpoint_ga ADD IF NOT EXISTS PARTITION (dt = '${part_pre1_path}') LOCATION 's3://mob-emr-test/dataplatform/datawarehourse/ga/player_checkpoint_ga/${part_pre1_path}'; insert overwrite table player_checkpoint_ga partition (dt = '${part_pre1_path}') select coalesce(max(ios_idfa), max(google_aid), max(android_id)) as dev_id, max(device) as model, max(country_code) as country, max(os_version) as osversion from player_checkpoint pc where pc.dt = '${part_pre1_path}' AND cast(pc.checkpoint as string) = '${part_pre1_dash}' group by coalesce(ios_idfa, google_aid, android_id); DROP TABLE dau_device_export_${date_str_undline}; create external table IF NOT EXISTS dau_device_export_${date_str_undline} ( device_id string, platform string, android_id string ) row format delimited fields terminated by '|' stored as textfile location 's3://mob-emr-test/dataplatform/datawarehourse/dmp/daily_export/${pre1_dateslash}/dau-device-data-export'; insert overwrite table dau_device_export_${date_str_undline} SELECT COALESCE(ios_idfa, google_aid) as device_id, platform, case when google_aid is not null then android_id end as android_id FROM player_checkpoint pc where pc.dt = '${part_pre1_path}' AND cast(pc.checkpoint as string) = '${part_pre1_dash}' and not (ios_idfa is null and google_aid is null); drop table dau_device_export_${date_str_undline}; DROP TABLE player_device_metrics_${date_str_undline}; DROP table player_installation_list_${date_str_undline};