Commit d38eb287 by mengdongxing

Upload new file

parent 16472c5a
-- --------------------------------------------------------
-- 主机: etl.cma5jkozme68.rds.cn-north-1.amazonaws.com.cn
-- 服务器版本: 5.6.27-log - MySQL Community Server (GPL)
-- 服务器操作系统: Linux
-- HeidiSQL 版本: 9.3.0.4984
-- --------------------------------------------------------
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET NAMES utf8mb4 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
-- 导出 tkdmchange 的数据库结构
CREATE DATABASE IF NOT EXISTS `tkdmchange` /*!40100 DEFAULT CHARACTER SET latin1 */;
USE `tkdmchange`;
-- 导出 表 tkdmchange.engine_baseetl 结构
CREATE TABLE IF NOT EXISTS `engine_baseetl` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(256) NOT NULL,
`description` longtext NOT NULL,
`template` longtext NOT NULL,
`added_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`modify_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`freq` char(50) NOT NULL DEFAULT 'day',
`is_run` tinyint(4) NOT NULL DEFAULT '1',
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=16 DEFAULT CHARSET=utf8;
-- 正在导出表 tkdmchange.engine_baseetl 的数据:4 rows
DELETE FROM `engine_baseetl`;
/*!40000 ALTER TABLE `engine_baseetl` DISABLE KEYS */;
INSERT INTO `engine_baseetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(12, 'tkdm_data_active_detial_day', 'data', 'set hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\nset mapreduce.output.fileoutputformat.compress=true;\r\nset mapred.max.split.size=256000000;\r\nset mapred.min.split.size.per.node=256000000;\r\nset mapred.min.split.size.per.rack=256000000;\r\nset hive.merge.mapfiles=true;\r\nset hive.merge.mapredfiles=true;\r\nset hive.merge.size.per.task=256000000;\r\nset hive.merge.smallfiles.avgsize=5000000;\r\nset mapred.task.timeout=60000000;\r\nset hive.exec.compress.intermediate=true;\r\nset hive.skewjoin.key=10000;\r\nset hive.optimize.skewjoin=true;\r\nset hive.merge.mapredfiles=true;\r\nset hive.groupby.skewindata=true;\r\n\r\n\r\ninsert overwrite table tkdm.tkdm_data_active_detial_day partition(ds=\'$ds\')\r\nselect /*+mapjoin(b)*/ a.appid,\r\n a.deviceid,\r\n a.cid,\r\n a.ins_date,\r\n nvl(b.category_id,-1) as category_id,\r\n nvl(b.isgame,-1) as isgame,\r\n nvl(b.platform,\'-1\') as platform\r\nfrom \r\n\r\n(select appid,\r\n deviceid,\r\n cid,\r\n to_date(ins_datetime) as ins_date\r\n from tkio_v2.tkio_mid_install_day\r\n where ds=\'$ds\'\r\n) a \r\nleft join\r\n\r\n(select appid,\r\n category_id,\r\n isgame,\r\n platform\r\n from tkio_v2.tkio_dp_app_info\r\n where ds=\'$ds\'\r\n) b on a.appid=b.appid\r\ncluster by deviceid,appid', '2017-03-09 09:34:57', '2017-03-21 01:18:03', 'day', 1);
INSERT INTO `engine_baseetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(13, 'tkdm_data_app_summary_day', 'data', 'set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; \r\nset mapred.min.split.size.per.rack=100000000; \r\nset mapred.min.split.size.per.node=100000000; \r\nset mapred.max.split.size=256000000; \r\nset hive.merge.mapfiles = true;\r\nset hive.merge.smallfiles.avgsize=16000000;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\ninsert overwrite table tkdm.tkdm_data_app_summary_day partition(ds=\'$ds\') \r\n\r\nselect /*+ mapjoin(b) */ a.appid,\r\n a.cid,\r\n a.num_click,\r\n a.dupnum_click_day,\r\n a.dupnum_click_all,\r\n a.num_install,\r\n round(a.num_install / a.dupnum_click_all,4) as ins_rate,\r\n a.dupnum_reged_day,\r\n a.dupnum_reged_mon,\r\n round(a.amt_income_new_user,2) as amt_income_new_user,\r\n a.num_payer_new_user,\r\n round(a.amt_income_all_user) as amt_income_all_user,\r\n a.num_payer_all_user,\r\n b.category_id,\r\n b.category_name,\r\n b.isgame\r\nfrom \r\n(select appid,\r\n cid,\r\n sum(num_click) as num_click,\r\n sum(dupnum_click_day) as dupnum_click_day,\r\n sum(dupnum_click_all) as dupnum_click_all,\r\n sum(num_install) as num_install,\r\n sum(dupnum_reged_day) as dupnum_reged_day,\r\n sum(dupnum_reged_mon) as dupnum_reged_mon,\r\n sum(amt_income_new_user) as amt_income_new_user,\r\n sum(num_payer_new_user) as num_payer_new_user,\r\n sum(amt_income_all_user) as amt_income_all_user,\r\n sum(num_payer_all_user) as num_payer_all_user\r\n from tkio_v2.tkio_rpt_channel_compare_day\r\n where dt=\'$ds\' and \r\n campaignid !=\'_default_\' and \r\n cid!=-2\r\n group by appid,cid\r\n having sum(dupnum_click_all) > 10\r\n) as a \r\n\r\njoin \r\n\r\n(select appid,\r\n category_id,\r\n category_name,\r\n isgame\r\n from tkio_v2.tkio_dp_app_info\r\n where ds =\'$ds\'\r\n) as b on a.appid=b.appid\r\n', '2017-03-09 09:34:57', '2017-03-21 01:18:12', 'day', 1);
INSERT INTO `engine_baseetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(14, 'tkdm_data_payment_detial_day', 'data', 'set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; \r\nset mapred.min.split.size.per.rack=100000000; \r\nset mapred.min.split.size.per.node=100000000; \r\nset mapred.max.split.size=256000000; \r\nset hive.merge.mapfiles = true;\r\nset hive.merge.smallfiles.avgsize=16000000;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\n\r\ninsert overwrite table tkdm.tkdm_data_payment_detial_day partition(ds=\'$ds\')\r\nselect appid,\r\n deviceid,\r\n ins_datetime,\r\n count(1) as num_payment_cnt,\r\n round(sum(amount),2) as amt_payment\r\n from tkio_v2.tkio_mid_pay_day\r\n where ds=\'$ds\' and \r\n deviceid!=\'00000000-0000-0000-0000-000000000000\'\r\n and deviceid!=\'\' and deviceid!=\'0\' and lower(paymenttype)!=\'free\'\r\n group by appid,deviceid,ins_datetime\r\n cluster by deviceid,appid', '2017-03-09 09:34:57', '2017-03-21 01:18:20', 'day', 1);
INSERT INTO `engine_baseetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(15, 'tkdm_data_retention_summary_day', 'data', 'set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; \r\nset mapred.min.split.size.per.rack=100000000; \r\nset mapred.min.split.size.per.node=100000000; \r\nset mapred.max.split.size=256000000; \r\nset hive.merge.mapfiles = true;\r\nset hive.merge.smallfiles.avgsize=16000000;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\n\r\ninsert overwrite table tkdm.tkdm_data_retention_summary_day partition(ds=\'$ds\') \r\n\r\nselect /*+ mapjoin(b) */ a.appid,\r\n a.cid,\r\n date_sub(\'$ds\',1) as ds_user,\r\n a.num_user,\r\n a.d1_user,\r\n a.d1_rate,\r\n b.category_id,\r\n b.category_name,\r\n b.isgame\r\nfrom \r\n(select appid,\r\n cid,\r\n sum(num_install) as num_user,\r\n sum(num_newuser_reten_d1) as d1_user,\r\n round(sum(num_newuser_reten_d1)/sum(num_install),4) as d1_rate\r\n from tkio_v2.tkio_rpt_channel_compare_day\r\n where ds between date_sub(\'$ds\',1) and \'$ds\' and \r\n dt=date_sub(\'$ds\',1)\r\n group by appid,cid\r\n having sum(num_install)>0\r\n) as a \r\n\r\njoin \r\n\r\n(select appid,\r\n category_id,\r\n category_name,\r\n isgame\r\n from tkio_v2.tkio_dp_app_info\r\n where ds =date_sub(\'$ds\',1)\r\n) as b on a.appid=b.appid', '2017-03-09 09:34:57', '2017-03-21 01:18:27', 'day', 1);
/*!40000 ALTER TABLE `engine_baseetl` ENABLE KEYS */;
-- 导出 表 tkdmchange.engine_cumulativesetl 结构
CREATE TABLE IF NOT EXISTS `engine_cumulativesetl` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(256) NOT NULL,
`description` longtext NOT NULL,
`template` longtext NOT NULL,
`added_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`modify_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`freq` char(50) NOT NULL DEFAULT 'day',
`is_run` tinyint(4) NOT NULL DEFAULT '1',
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=19 DEFAULT CHARSET=utf8;
-- 正在导出表 tkdmchange.engine_cumulativesetl 的数据:2 rows
DELETE FROM `engine_cumulativesetl`;
/*!40000 ALTER TABLE `engine_cumulativesetl` DISABLE KEYS */;
INSERT INTO `engine_cumulativesetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(17, 'tkdm_base_active_payment_info', 'base', 'set hive.skewjoin.key=10000;\r\nset hive.optimize.skewjoin=true;\r\nset hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;\r\nset hive.optimize.bucketmapjoin = true;\r\nset hive.optimize.bucketmapjoin.sortedmerge = true;\r\nset hive.merge.mapredfiles=true;\r\nset hive.auto.convert.sortmerge.join=true;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\ninsert overwrite table tkdm.tkdm_base_active_payment_info partition(ds=\'$ds\')\r\nselect x.appid,\r\n x.deviceid,\r\n x.cid,\r\n x.first_ins_date,\r\n x.last_ins_date,\r\n x.category_id,\r\n x.isgame,\r\n x.platform,\r\n nvl(x.first_payment_ds,y.payment_ds) as first_payment_ds,\r\n nvl(y.payment_ds,x.last_amt_payment) as last_payment_ds,\r\n x.num_payment_cnt + y.num_payment_cnt as num_payment_cnt,\r\n x.num_payment_day + y.num_payment_day as num_payment_day,\r\n x.amt_payment +y.amt_payment as amt_payment,\r\n nvl(y.amt_payment,x.last_amt_payment) as last_amt_payment\r\n\r\nfrom \r\n(select nvl(a.deviceid,b.deviceid) as deviceid,\r\n nvl(a.appid,b.appid) as appid,\r\n nvl(b.cid,a.cid) as cid,\r\n nvl(a.first_ins_date,b.ins_date) as first_ins_date,\r\n nvl(b.ins_date,a.last_ins_date) as last_ins_date,\r\n nvl(b.category_id,a.category_id) as category_id,\r\n nvl(b.isgame,a.isgame) as isgame,\r\n nvl(b.platform,a.platform) as platform,\r\n a.first_payment_ds,\r\n a.last_payment_ds,\r\n nvl(a.num_payment_cnt,0) as num_payment_cnt,\r\n nvl(a.num_payment_day,0) as num_payment_day,\r\n nvl(a.amt_payment,0) amt_payment,\r\n nvl(a.last_amt_payment,0) as last_amt_payment\r\n from \r\n (select deviceid,\r\n appid,\r\n cid,\r\n first_ins_date,\r\n last_ins_date,\r\n category_id,\r\n isgame,\r\n platform,\r\n first_payment_ds,\r\n last_payment_ds,\r\n num_payment_cnt,\r\n num_payment_day,\r\n amt_payment,\r\n last_amt_payment\r\n from tkdm.tkdm_base_active_payment_info\r\n where ds=date_sub(\'$ds\',1) \r\n ) a\r\n\r\n full join\r\n\r\n (select deviceid,\r\n appid,\r\n cid,\r\n ins_date,\r\n category_id,\r\n isgame,\r\n platform\r\n from tkdm.tkdm_data_active_detial_day\r\n where ds=\'$ds\' \r\n ) b on a.deviceid=b.deviceid and a.appid=b.appid \r\n) x\r\n\r\nleft join \r\n\r\n(select deviceid,\r\n appid,\r\n to_date(ins_datetime) as ins_date,\r\n \'$ds\' as payment_ds,\r\n num_payment_cnt,\r\n 1 as num_payment_day,\r\n amt_payment\r\n from tkdm.tkdm_data_payment_detial_day\r\n where ds=\'$ds\' \r\n) y on x.deviceid=y.deviceid and x.appid=y.appid\r\ncluster by deviceid,appid', '2017-03-09 09:34:57', '2017-03-21 01:18:38', 'day', 1);
INSERT INTO `engine_cumulativesetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(18, 'tkdm_base_device_pay_info', 'base', 'set hive.skewjoin.key=10000;\r\nset hive.optimize.skewjoin=true;\r\nset hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;\r\nset hive.optimize.bucketmapjoin = true;\r\nset hive.optimize.bucketmapjoin.sortedmerge = true;\r\nset hive.merge.mapredfiles=true;\r\nset hive.auto.convert.sortmerge.join=true;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\ninsert overwrite table tkdm.tkdm_base_device_pay_info partition(ds=\'$ds\')\r\n\r\nselect nvl(b.appid,a.appid) as appid,\r\n nvl(b.deviceid,a.deviceid) as deviceid,\r\n nvl(b.first_payment_ds,\'$ds\') as first_payment_ds,\r\n if(isnull(a.deviceid),\'$ds\',nvl(b.last_payment_ds,\'1970-01-01\')) as last_payment_ds,\r\n nvl(b.num_payment_cnt,0)+nvl(a.num_payment_cnt,0) as num_payment_cnt,\r\n nvl(b.num_payment_day,0)+nvl(a.num_payment_day,0) as num_payment_day,\r\n round(nvl(b.amt_payment,0) + nvl(a.amt_payment,0),2) as amt_payment,\r\n coalesce(a.ins_datetime,b.ins_datetime,\'1970-01-01\') as ins_datetime,\r\n nvl(a.amt_payment,0) as amt_payment_newest\r\nfrom \r\n(select appid,\r\n deviceid,\r\n ins_datetime,\r\n amt_payment,\r\n num_payment_cnt,\r\n 1 as num_payment_day\r\n from tkdm.tkdm_data_payment_detial_day\r\n where ds=\'$ds\' and \r\n deviceid!=\'00000000-0000-0000-0000-000000000000\'\r\n and deviceid!=\'\' and deviceid!=\'0\'\r\n) a \r\nfull join \r\n\r\n(select appid,\r\n deviceid,\r\n first_payment_ds,\r\n last_payment_ds,\r\n num_payment_cnt,\r\n num_payment_day,\r\n amt_payment,\r\n ins_datetime\r\n from tkdm.tkdm_base_device_pay_info\r\n where ds= date_sub(\'$ds\',1)\r\n) b on a.appid=b.appid and a.deviceid=b.deviceid\r\ncluster by deviceid', '2017-03-09 09:34:57', '2017-03-21 01:18:44', 'day', 1);
/*!40000 ALTER TABLE `engine_cumulativesetl` ENABLE KEYS */;
-- 导出 表 tkdmchange.engine_customconception 结构
CREATE TABLE IF NOT EXISTS `engine_customconception` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(256) NOT NULL,
`description` longtext NOT NULL,
`template` longtext NOT NULL,
`added_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`modify_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`freq` char(50) NOT NULL DEFAULT 'day',
`is_run` tinyint(4) NOT NULL DEFAULT '1',
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=20 DEFAULT CHARSET=utf8;
-- 正在导出表 tkdmchange.engine_customconception 的数据:0 rows
DELETE FROM `engine_customconception`;
/*!40000 ALTER TABLE `engine_customconception` DISABLE KEYS */;
/*!40000 ALTER TABLE `engine_customconception` ENABLE KEYS */;
-- 导出 表 tkdmchange.engine_customlogicchain 结构
CREATE TABLE IF NOT EXISTS `engine_customlogicchain` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(256) NOT NULL,
`description` longtext NOT NULL,
`template` longtext NOT NULL,
`added_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`modify_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`freq` char(50) NOT NULL DEFAULT 'day',
`is_run` tinyint(4) NOT NULL DEFAULT '1',
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=15 DEFAULT CHARSET=utf8;
-- 正在导出表 tkdmchange.engine_customlogicchain 的数据:0 rows
DELETE FROM `engine_customlogicchain`;
/*!40000 ALTER TABLE `engine_customlogicchain` DISABLE KEYS */;
/*!40000 ALTER TABLE `engine_customlogicchain` ENABLE KEYS */;
-- 导出 表 tkdmchange.engine_reportetl 结构
CREATE TABLE IF NOT EXISTS `engine_reportetl` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(256) NOT NULL,
`description` longtext NOT NULL,
`template` longtext NOT NULL,
`added_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`modify_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`freq` char(50) NOT NULL DEFAULT 'day',
`is_run` tinyint(4) NOT NULL DEFAULT '1',
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=29 DEFAULT CHARSET=utf8;
-- 正在导出表 tkdmchange.engine_reportetl 的数据:7 rows
DELETE FROM `engine_reportetl`;
/*!40000 ALTER TABLE `engine_reportetl` DISABLE KEYS */;
INSERT INTO `engine_reportetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(22, 'tkdm_output_payment_explore_day', 'output', 'set mapred.max.split.size=256000000;\r\nset mapred.min.split.size.per.node=256000000\r\nset Mapred.min.split.size.per.rack=256000000\r\nset hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat\r\nset hive.groupby.skewindata=true;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\ninsert overwrite table tkdm.tkdm_output_payment_explore_day partition(ds=\'$ds\')\r\n\r\nselect \'$ds\' as dt,\r\n x.appid,\r\n count(1) as num_ins,\r\n sum(x.is_new_ins_pay) as num_pay,\r\n sum(if(x.amt_ins_pay_level=0 and is_potential>1,1,0)) as num_potential,\r\n sum(if(x.rmb_level=1,1,0)) as num_low_rmb,\r\n sum(if(x.rmb_level=2,1,0)) as num_medium_rmb,\r\n sum(if(x.rmb_level=3,1,0)) as num_high_rmb,\r\n sum(if(x.amt_ins_pay_level=1,1,0)) as num_level_1,\r\n sum(if(x.amt_ins_pay_level=2,1,0)) as num_level_2,\r\n sum(if(x.amt_ins_pay_level=3,1,0)) as num_level_3\r\nfrom \r\n(select appid,\r\n deviceid,\r\n if(last_ins_date=\'$ds\' ,1,0) as is_new_ins,\r\n if(last_ins_date=\'$ds\' and last_payment_ds=\'$ds\' ,1,0) as is_new_ins_pay,\r\n if( last_ins_date=\'$ds\' and last_payment_ds=\'$ds\' ,\r\n case when last_amt_payment < 6 then 1 \r\n when last_amt_payment < 198 then 2\r\n else 3 end,\r\n 0\r\n ) as amt_ins_pay_level,\r\n case when amt_payment <=0 or amt_payment is null then 0 \r\n when amt_payment <500 then 1\r\n when amt_payment <5000 then 2\r\n else 3 end as rmb_level,\r\n sum(if(amt_payment>0,1,0)) over(partition by deviceid) as is_potential\r\n from tkdm.tkdm_base_active_payment_info\r\n where ds=\'$ds\'\r\n) x \r\nwhere x.is_new_ins=1\r\ngroup by x.appid\r\ncluster by appid\r\n', '2017-03-09 09:34:57', '2017-03-21 01:18:51', 'day', 1);
INSERT INTO `engine_reportetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(23, 'tkdm_output_user_cluster_app_day', 'output', 'set mapred.max.split.size=256000000;\r\nset mapred.min.split.size.per.node=256000000\r\nset Mapred.min.split.size.per.rack=256000000\r\nset hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat\r\nset hive.groupby.skewindata=true;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\n\r\ninsert overwrite table tkdm.tkdm_output_user_cluster_app_day partition(ds=\'$ds\')\r\n\r\nselect \'$ds\' as dt,\r\n appid,\r\n category_id,\r\n isgame,\r\n count(if(cid=-1,1,null)) as num_user_def,\r\n count(if(cid!=-1,1,null)) as num_user_cam\r\nfrom tkdm.tkdm_data_active_detial_day\r\nwhere ds=\'$ds\'\r\ngroup by appid,category_id,isgame\r\ncluster by appid,category_id,isgame', '2017-03-09 09:34:57', '2017-03-21 01:18:56', 'day', 1);
INSERT INTO `engine_reportetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(24, 'tkdm_output_user_cluster_top_all', 'output', 'set mapred.max.split.size=256000000;\r\nset mapred.min.split.size.per.node=256000000\r\nset Mapred.min.split.size.per.rack=256000000\r\nset hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat\r\nset hive.groupby.skewindata=true;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\ninsert overwrite table tkdm.tkdm_output_user_cluster_top_all partition(ds=\'$ds\')\r\n\r\nselect \'$ds\' as dt,\r\n cid,\r\n category_id,\r\n isgame,\r\n num_user,\r\n top_rank\r\nfrom (\r\n select cid,\r\n category_id,\r\n isgame,\r\n num_user,\r\n dense_rank() over(partition by category_id,isgame order by num_user desc ) as top_rank\r\n from (\r\n select cid,\r\n category_id,\r\n isgame,\r\n count(1) as num_user\r\n from tkdm.tkdm_base_active_payment_info\r\n where ds=\'$ds\' and \r\n last_ins_date between add_months(\'$ds\',-2) and \'$ds\'\r\n group by cid,category_id,isgame\r\n cluster by cid,category_id,isgame\r\n ) x \r\n ) y \r\nwhere top_rank <=100', '2017-03-09 09:34:57', '2017-03-21 01:19:02', 'day', 1);
INSERT INTO `engine_reportetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(25, 'tkdm_output_active_app_std_week', 'output', 'set mapred.max.split.size=256000000;\r\nset mapred.min.split.size.per.node=256000000\r\nset Mapred.min.split.size.per.rack=256000000\r\nset hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat\r\nset hive.groupby.skewindata=true;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\nadd jar /mnt1/testdata/udf-0.0.1.jar;\r\nCREATE TEMPORARY FUNCTION bootstrapmedian AS \'com.reyun.hive.udaf.BootStrap\';\r\n\r\ninsert overwrite table tkdm.tkdm_output_active_app_std_week partition(ds=\'$ds\') \r\n\r\nselect /*+ mapjoin(x)*/ x.appid,\r\n \'$week_start\' as week_dt,\r\n x.category_id,\r\n x.isgame,\r\n nvl(y.avg_ins_rate,-1) as avg_ins_rate,\r\n nvl(y.approx_med_ins_rate,-1) as approx_med_ins_rate,\r\n nvl(y.bootstarp_med_ins_rate,-1) as bootstarp_med_ins_rate\r\nfrom \r\n\r\n(select appid,category_id,isgame\r\n from tkio_v2.tkio_dp_app_info\r\n where ds=\'$ds\' \r\n) as x \r\n\r\nleft join \r\n\r\n(select category_id, \r\n isgame,\r\n round(avg(ins_rate), 4) avg_ins_rate,\r\n round(percentile_approx(ins_rate,0.5),4) approx_med_ins_rate,\r\n round(bootstrapmedian(ins_rate),4) bootstarp_med_ins_rate\r\n from (\r\n select appid,\r\n category_id,\r\n isgame ,\r\n ds,\r\n round(sum(num_install) / sum(dupnum_click_all),4) as ins_rate\r\n from tkdm.tkdm_data_app_summary_day\r\n where ds between \'$week_start\' and \'$ds\'\r\n group by appid,category_id,isgame,ds\r\n ) as tt \r\n group by category_id, isgame\r\n) y on x.category_id=y.category_id and x.isgame=y.isgame', '2017-03-09 09:34:57', '2017-03-21 01:19:09', 'week', 1);
INSERT INTO `engine_reportetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(26, 'tkdm_output_active_cid_std_week', 'output', 'set mapred.max.split.size=256000000;\r\nset mapred.min.split.size.per.node=256000000\r\nset Mapred.min.split.size.per.rack=256000000\r\nset hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat\r\nset hive.groupby.skewindata=true;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\nadd jar /mnt1/testdata/udf-0.0.1.jar;\r\nCREATE TEMPORARY FUNCTION bootstrapmedian AS \'com.reyun.hive.udaf.BootStrap\';\r\n\r\ninsert overwrite table tkdm.tkdm_output_active_cid_std_week partition(ds=\'$ds\') \r\nselect /*+ mapjoin(x)*/ x.appid,\r\n \'$week_start\' as week_dt,\r\n y.cid,\r\n x.category_id,\r\n x.isgame,\r\n nvl(y.avg_ins_rate,-1) as avg_ins_rate,\r\n nvl(y.approx_med_ins_rate,-1) as approx_med_ins_rate,\r\n nvl(y.bootstarp_med_ins_rate,-1) as bootstarp_med_ins_rate\r\n\r\nfrom \r\n\r\n(select appid,category_id,isgame\r\n from tkio_v2.tkio_dp_app_info\r\n where ds=\'$ds\' \r\n) as x\r\n\r\nleft join \r\n\r\n(select category_id, \r\n isgame,\r\n cid,\r\n round(avg(ins_rate), 4) avg_ins_rate,\r\n round(percentile_approx(ins_rate,0.5),4) approx_med_ins_rate,\r\n round(bootstrapmedian(ins_rate),4) bootstarp_med_ins_rate\r\n from tkdm.tkdm_data_app_summary_day\r\n where ds between \'$week_start\' and \'$ds\'\r\n group by category_id, isgame,cid\r\n) y on x.category_id=y.category_id and x.isgame=y.isgame', '2017-03-09 09:35:02', '2017-03-21 01:19:14', 'week', 1);
INSERT INTO `engine_reportetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(27, 'tkdm_output_reten_app_std_week', 'output', 'set mapred.max.split.size=256000000;\r\nset mapred.min.split.size.per.node=256000000\r\nset Mapred.min.split.size.per.rack=256000000\r\nset hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat\r\nset hive.groupby.skewindata=true;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\nadd jar /mnt1/testdata/udf-0.0.1.jar;\r\nCREATE TEMPORARY FUNCTION bootstrapmedian AS \'com.reyun.hive.udaf.BootStrap\';\r\n\r\ninsert overwrite table tkdm.tkdm_output_reten_app_std_week partition(ds=\'$ds\') \r\n\r\nselect /*+ mapjoin(x)*/ x.appid,\r\n \'$week_start\' as week_dt,\r\n x.category_id,\r\n x.isgame,\r\n nvl(y.avg_d1_rate,-1) as avg_ins_rate,\r\n nvl(y.approx_med_d1_rate,-1) as approx_med_ins_rate,\r\n nvl(y.bootstarp_med_d1_rate,-1) as bootstarp_med_ins_rate\r\nfrom \r\n\r\n(select appid,category_id,isgame\r\n from tkio_v2.tkio_dp_app_info\r\n where ds=\'$ds\' \r\n) as x\r\n\r\nleft join \r\n(select category_id, \r\n isgame,\r\n round(avg(d1_rate), 4) avg_d1_rate,\r\n round(percentile_approx(d1_rate,0.5),4) approx_med_d1_rate,\r\n round(bootstrapmedian(d1_rate),4) bootstarp_med_d1_rate\r\n from (\r\n select appid,\r\n category_id,\r\n isgame ,\r\n ds,\r\n round(sum(d1_user) / sum(num_user),4) as d1_rate\r\n from tkdm.tkdm_data_retention_summary_day\r\n where ds between \'$week_start\' and \'$ds\'\r\n group by appid,category_id,isgame,ds\r\n ) as tt \r\n group by category_id, isgame\r\n) y on x.category_id=y.category_id and x.isgame=y.isgame', '2017-03-09 09:35:06', '2017-03-21 01:19:22', 'week', 1);
INSERT INTO `engine_reportetl` (`id`, `title`, `description`, `template`, `added_at`, `modify_at`, `freq`, `is_run`) VALUES
(28, 'tkdm_output_app_cumsum_all', 'output', 'set mapred.max.split.size=256000000;\r\nset mapred.min.split.size.per.node=256000000\r\nset Mapred.min.split.size.per.rack=256000000\r\nset hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat\r\nset hive.groupby.skewindata=true;\r\nset hive.exec.compress.output=true;\r\nset mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;\r\n\r\ninsert overwrite table tkdm.tkdm_output_app_cumsum_all partition(ds=\'$ds\') \r\n\r\nselect \'$ds\' as dt,\r\n appid,\r\n sum(cum_num_click) as cum_num_click,\r\n sum(cum_dupnum_click_day) as cum_dupnum_click_day,\r\n sum(cum_dupnum_click_all) as cum_dupnum_click_all,\r\n sum(cum_num_install) as cum_num_install,\r\n sum(cum_dupnum_reged_day) as cum_dupnum_reged_day,\r\n sum(cum_dupnum_reged_mon) as cum_dupnum_reged_mon,\r\n round(sum(cum_amt_income_new_user),2) as cum_amt_income_new_user,\r\n sum(cum_num_payer_new_user) as cum_num_payer_new_user,\r\n round(sum(cum_amt_income_all_user),2) as cum_amt_income_all_user,\r\n sum(cum_num_payer_all_user) as cum_num_payer_all_user\r\n\r\nfrom (\r\nselect appid,\r\n cum_num_click,\r\n cum_dupnum_click_day,\r\n cum_dupnum_click_all,\r\n cum_num_install,\r\n cum_dupnum_reged_day,\r\n cum_dupnum_reged_mon,\r\n cum_amt_income_new_user,\r\n cum_num_payer_new_user,\r\n cum_amt_income_all_user,\r\n cum_num_payer_all_user\r\n from tkdm.tkdm_output_app_cumsum_all\r\n where ds=date_sub(\'$ds\',1)\r\n\r\n\r\nunion all\r\n\r\nselect appid,\r\n sum(num_click) as cum_num_click,\r\n sum(dupnum_click_day) as cum_dupnum_click_day,\r\n sum(dupnum_click_all) as cum_dupnum_click_all,\r\n sum(num_install) as cum_num_install,\r\n sum(dupnum_reged_day) as cum_dupnum_reged_day,\r\n sum(dupnum_reged_mon) as cum_dupnum_reged_mon,\r\n sum(amt_income_new_user) as cum_amt_income_new_user,\r\n sum(num_payer_new_user) as cum_num_payer_new_user,\r\n sum(amt_income_all_user) as cum_amt_income_all_user,\r\n sum(num_payer_all_user) as cum_num_payer_all_user\r\n from tkdm.tkdm_data_app_summary_day\r\n where ds=\'$ds\'\r\n group by appid\r\n) as t group by appid\r\n\r\n', '2017-03-10 06:56:40', '2017-03-21 01:19:27', 'day', 1);
/*!40000 ALTER TABLE `engine_reportetl` ENABLE KEYS */;
-- 导出 表 tkdmchange.engine_sharding 结构
CREATE TABLE IF NOT EXISTS `engine_sharding` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`num` int(11) NOT NULL,
`added_at` datetime NOT NULL,
`modify_at` datetime NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=3 DEFAULT CHARSET=utf8;
-- 正在导出表 tkdmchange.engine_sharding 的数据:1 rows
DELETE FROM `engine_sharding`;
/*!40000 ALTER TABLE `engine_sharding` DISABLE KEYS */;
INSERT INTO `engine_sharding` (`id`, `num`, `added_at`, `modify_at`) VALUES
(1, 1, '2017-03-08 18:04:32', '2017-03-08 18:04:32');
/*!40000 ALTER TABLE `engine_sharding` ENABLE KEYS */;
/*!40101 SET SQL_MODE=IFNULL(@OLD_SQL_MODE, '') */;
/*!40014 SET FOREIGN_KEY_CHECKS=IF(@OLD_FOREIGN_KEY_CHECKS IS NULL, 1, @OLD_FOREIGN_KEY_CHECKS) */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment