# BootStarpMedian function BootStarpMedian <- function(x) { x <- as.vector(x) n <- length(x) med_vec <- replicate(n=1000,median(base::sample(x=x,size=n,replace=T))) return(mean(med_vec)) } #---------------------- # app std #---------------------- # loading data app_query_sql <- "select appid, category_id, isgame , ds, sum(num_install) / sum(dupnum_click_all) as ins_rate from tkdm.tkdm_data_active_detial_day where ds between '2017-02-20' and '2017-02-26' group by appid,category_id,isgame,ds " # app_query_result app_query_result <- sql(app_query_sql) # app_schema <- structType(structField("category_id_app", "integer"), structField("isgame_app", "integer"), structField("ins_rate_std", "double") ) # aggregate app <- gapply(x=app_query_result, cols=c('category_id','isgame'), function(key,x){ data.frame(key,BootStarpMedian(x$ins_rate),stringsAsFactors = FALSE ) }, schema=app_schema) # collect(yy) app_output <- join(x=app_query_result, y=app, joinExpr=app_query_result$category_id==app$category_id_app & app_query_result$isgame==app$isgame_app, joinType='inner' ) app_output <- select(app_output,'appid','category_id','isgame','ins_rate_std','ds') #---------------------- # cid std #---------------------- # loading data cid_query_sql <- "select appid, cid, category_id, isgame , ins_rate from tkdm.tkdm_data_active_detial_day where ds between '2017-02-20' and '2017-02-26' " # result of query sql aa <- sql(cid_query_sql) # output schema cid_schema <- structType(structField("category_id_cid", "integer"), structField("isgame_cid", "integer"), structField("cid_cid", "integer"), structField("ins_rate_std", "double") ) # aggregate cid <- gapply(x=aa, cols=c('category_id','isgame','cid'), function(key,x){ data.frame(key,BootStarpMedian(x$ins_rate),stringsAsFactors = FALSE ) }, schema=cid_schema) # collect(cid) cid_output <- join(x=aa,y=cid, joinExpr=aa$category_id==cid$category_id_cid & aa$isgame==cid$isgame_cid & aa$cid==cid$cid_cid, joinType='inner' ) cid_output <- select(cid_output,'appid','cid','category_id','isgame','ins_rate_std','ds')