#!/usr/bin/env bash startDay='20170803' stopDay='20170805' while [ $startDay -le $stopDay ];do date_path=${startDay:0:4}/${startDay:4:2}/${startDay:6:2} echo $date_path inputPath="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/{tmp/3s_install_daily/${date_path}/,adserver_packageName/${date_path}/,etl_adn_install_daily/${date_path}/,etl_adn_sdk_request_daily/${date_path}/,etl_dsp_request_daily/${date_path}/,ods_ga_device_daily/${date_path}/,ods_ga_install_daily/${date_path}/}" installPath="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dm_install_list/2017/08/08/*" outputPath="s3://mob-emr-test/feng.liang/weightGame/$date_path" hadoop fs -rm -r $outputPath spark-submit --class mobvista.prd.main.WeightGame \ --master yarn --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../DMP.jar "$inputPath" "$installPath" "$outputPath" if [ $? -ne 0 ];then echo "$startDay fail" exit fi startDay=`date -d"$startDay 1 days" +"%Y%m%d"` done create_hql=" use dwh; drop table tmp_active_info; create external table tmp_active_info ( device_id string, device_type string ) partitioned by (dt string) row format delimited fields terminated by '\t' location 's3://mob-emr-test/feng.liang/weightGame' ; alter table tmp_active_info add partition(dt='20170801') location 's3://mob-emr-test/feng.liang/weightGame/2017/08/01'; alter table tmp_active_info add partition(dt='20170802') location 's3://mob-emr-test/feng.liang/weightGame/2017/08/02'; alter table tmp_active_info add partition(dt='20170803') location 's3://mob-emr-test/feng.liang/weightGame/2017/08/03'; alter table tmp_active_info add partition(dt='20170804') location 's3://mob-emr-test/feng.liang/weightGame/2017/08/04'; alter table tmp_active_info add partition(dt='20170805') location 's3://mob-emr-test/feng.liang/weightGame/2017/08/05'; " hive -e "$create_hql" if [ $? -ne 0 ];then exit fi hql=" user dwh; insert overwrite directory 's3://mob-emr-test/feng.liang/weightGameDAU' SELECT t.dt, t.device_type, t.country, count(1) AS dau FROM ( SELECT a.dt, a.device_type, b.country, a.device_id FROM ( SELECT * FROM tmp_active_info t WHERE t.dt >= '20170801' AND t.dt <= '20170805' ) a LEFT OUTER JOIN ( SELECT * FROM ods_dmp_device_total t WHERE t.dt = '20170810' ) b ON a.device_id = b.device_id AND a.device_type = b.device_type GROUP BY a.dt, a.device_type, b.country, a.device_id ) t GROUP BY t.dt, t.device_type, t.country; " hive -e "$hql" if [ $? -ne 0 ];then exit fi