package mobvista.dmp.datasource.age_gender import org.apache.spark.sql.types.{StringType, StructField, StructType} /** * @package: mobvista.dmp.datasource.age * @author: wangjf * @create: 2018-09-06 10:53 **/ object Constant { def schema: StructType = { StructType(StructField("device_id", StringType) :: StructField("tag", StringType) :: StructField("device_type", StringType) :: StructField("package_names", StringType) :: StructField("new_date", StringType) :: Nil) } def schema_bigmedia_domestic: StructType = { StructType(StructField("device_id", StringType) :: StructField("device_type", StringType) :: StructField("platform", StringType) :: StructField("network", StringType) :: StructField("uuid", StringType) :: StructField("event_name", StringType) :: StructField("event_value", StringType) :: StructField("timestamp_date", StringType) :: StructField("package_name", StringType) :: StructField("genders", StringType) :: StructField("age_min", StringType) :: StructField("age_max", StringType) :: Nil) } def schema_age_gender: StructType = { StructType(StructField("device_id", StringType) :: StructField("tag", StringType) :: StructField("label", StringType) :: StructField("business", StringType) :: StructField("device_type", StringType) :: Nil) } def schema_gender_lr: StructType = { StructType(StructField("device_id", StringType) :: StructField("device_type", StringType) :: StructField("label", StringType) :: StructField("package_names", StringType) :: StructField("label_type", StringType) :: Nil) } def merge_schema: StructType = { StructType(StructField("device_id", StringType) :: StructField("package_names", StringType) :: StructField("label", StringType) :: StructField("device_type", StringType) :: StructField("update_date", StringType) :: Nil) } def gender_schema: StructType = { StructType(StructField("device_id", StringType) :: StructField("device_type", StringType) :: StructField("gender", StringType) :: StructField("gender_type", StringType) :: Nil) } def age_schema: StructType = { StructType(StructField("device_id", StringType) :: StructField("label", StringType) :: Nil) } val install_list_sql = s""" |SELECT device_id, device_type, install_list package_name | FROM dwh.dm_install_list WHERE concat(year, month, day) = '@date' | AND business IN ('3s','adn_request_sdk','dsp_req') | UNION ALL | SELECT device_id, device_type, install_list package_name | FROM dwh.dm_install_list WHERE concat(year, month, day) = '@yes_date' | AND business = 'ga' """.stripMargin val dmp_install_list_join_age_sql: String = """ |SELECT t.device_id device_id, package_names, COALESCE(label,'null') label, device_type, update_date | FROM t_install t | LEFT OUTER JOIN | (SELECT device_id, CONCAT_WS('$',COLLECT_SET(CONCAT(label,'#',business))) label | FROM t_age | GROUP BY device_id | ) a | ON LOWER(t.device_id) = LOWER(a.device_id) |""".stripMargin val dmp_install_list_join_gender_sql: String = """ |SELECT t.device_id device_id, package_names, COALESCE(label,'null') label, device_type, update_date | FROM t_install t | LEFT OUTER JOIN | (SELECT device_id, CONCAT_WS('$',COLLECT_SET(CONCAT(label,'#',business))) label | FROM t_gender | GROUP BY device_id | ) a | ON t.device_id = a.device_id |""".stripMargin val dmp_install_list_sql: String = """ |SELECT device_id device_id, split_keys(CONCAT_WS('#',COLLECT_SET(install_list))) package_names, MAX(device_type) device_type, MAX(update_date) update_date | FROM | (SELECT device_id, device_type, platform, pkg_keys(install_list) install_list, update_date | FROM dwh.dmp_install_list | WHERE dt = '@date' AND business NOT IN ('ga','other','14days','day') AND update_date > '@update_date' | UNION | SELECT device_id, device_type, platform, pkg_keys(install_list) install_list, update_date | FROM dwh.dmp_install_list | WHERE dt = '@ga_date' AND business IN ('ga','other') AND update_date > '@update_date' | ) all |GROUP BY device_id |""".stripMargin val dmp_install_list_sql_14days: String = """ |SELECT device_id, device_type, platform, pkg_keys(install_list) package_names, update_date | FROM dwh.dmp_install_list | WHERE dt = '@date' AND business = '14days' |""".stripMargin val install_list_v2_sql = s""" |SELECT device_id, 'B' tag, max(device_type) device_type, concat_ws('#',collect_list(package_name)) package_names, | max(update_date) new_date | FROM | (SELECT device_id, device_type, package_name, update_date | FROM dwh.dm_install_list_v2 | WHERE dt = '@date' AND business IN ('3s','adn_request_sdk','adn_install','dsp_req') AND @check_deviceId | UNION ALL | SELECT device_id, device_type, package_name, update_date | FROM dwh.dm_install_list_v2 | WHERE dt = '@ga_date' AND business = 'ga' AND @check_deviceId | UNION ALL | SELECT device_id, device_type, package_name, update_date | FROM dwh.dm_install_list_v2 | WHERE dt = '@other_date' AND business = 'other' AND @check_deviceId | ) all | GROUP BY device_id """.stripMargin val ods_gender_sql = s""" |select |coalesce(fb.device_id,tp.device_id,ga.device_id,bm.device_id,dsp.device_id) device_id, |coalesce(fb.label,tp.label,ga.label,bm.label,dsp.label) label, |coalesce(fb.device_type,tp.device_type,ga.device_type,bm.device_type,dsp.device_type) device_type |from (select * from dwh.ods_gender_fb where dt = '@date' ) fb full join (select * from dwh.ods_gender_tp where dt = '@date' ) tp on (fb.device_id = tp.device_id and fb.device_type = tp.device_type) |full join (select * from dwh.ods_gender_ga where dt = '@date' ) ga on (coalesce(fb.device_id,tp.device_id) = ga.device_id and coalesce(fb.device_type,tp.device_type) = ga.device_type) |full join (select * from dwh.ods_gender_bm where dt = '@dt_yesterday' ) bm on (coalesce(fb.device_id,tp.device_id,ga.device_id) = bm.device_id and coalesce(fb.device_type,tp.device_type,ga.device_type) = bm.device_type) |full join (select * from dwh.ods_gender_dsp where dt = '@date' ) dsp on (coalesce(fb.device_id,tp.device_id,ga.device_id,bm.device_id) = dsp.device_id and coalesce(fb.device_type,tp.device_type,ga.device_type,bm.device_type) = dsp.device_type) |""".stripMargin val dmp_device_gender = s""" |select X.device_id,X.device_type,min(gender) gender,min(gender_type) gender_type from ( |select device_id,device_type,case when label_type = 'none' then 'n' else label end as gender,label_type gender_type from dwh.ods_device_gender where dt = '@date' and label_type !='calc' |union all |select device_id,device_type,gender,gender_type from ods_gender_prediction_tab ) X |group by X.device_id,X.device_type |""".stripMargin val install_list_v2_gender_lr_sql = s""" |SELECT device_id, device_type, concat_ws('#',collect_set(package_name)) package_names | FROM | (SELECT device_id, device_type, package_name, update_date | FROM dwh.dm_install_list_v2 | WHERE dt = '@date' AND business IN ('3s','adn_request_sdk','adn_install','dsp_req') | UNION ALL | SELECT device_id, device_type, package_name, update_date | FROM dwh.dm_install_list_v2 | WHERE dt = '@ga_date' AND business = 'ga' | UNION ALL | SELECT device_id, device_type, package_name, update_date | FROM dwh.dm_install_list_v2 | WHERE dt = '@other_date' AND business = 'other' | ) all | GROUP BY device_id, device_type """.stripMargin val dmp_install_list_gender_lr_sql: String = """ |SELECT device_id, device_type device_type, concat_ws('#',collect_list(package_names)) package_names, | max(update_date) new_date | FROM | (SELECT device_id, device_type, pkg_keys(install_list) package_names, update_date | FROM dwh.dmp_install_list | WHERE dt = '@date' AND business IN ('3s','adn_request_sdk','adn_install','dsp_req') | UNION ALL | SELECT device_id, device_type, pkg_keys(install_list) package_names, update_date | FROM dwh.dmp_install_list | WHERE dt = '@ga_date' AND business IN ('ga','other') | ) all | GROUP BY device_id, device_type |""".stripMargin val install_list_v2_gender_sql = s""" |SELECT device_id, device_type, business, concat_ws('#',collect_set(package_name)) package_names, | max(update_date) new_date | FROM | (SELECT device_id, device_type, business, package_name, update_date | FROM dwh.dm_install_list_v2 | WHERE dt = '@date' AND business IN ('3s','adn_request_sdk','adn_install','dsp_req') AND @check_deviceId | UNION ALL | SELECT device_id, device_type, business, package_name, update_date | FROM dwh.dm_install_list_v2 | WHERE dt = '@ga_date' AND business = 'ga' AND @check_deviceId | UNION ALL | SELECT device_id, device_type, business, package_name, update_date | FROM dwh.dm_install_list_v2 | WHERE dt = '@other_date' AND business = 'other' AND @check_deviceId | ) all | GROUP BY device_id, device_type, business """.stripMargin val dsp_sql = s""" |SELECT device_id, device_type, birthday, gender, 'dsp' as tag | FROM dwh.dm_profile_total WHERE concat(year, month, day) = '@date' AND type = '@type' AND @check_deviceId AND (@check_birthday OR @check_gender) """.stripMargin val dm_profile_sql_lr = s""" |SELECT device_id, device_type, min(birthday) birthday, min(gender) gender, 'dsp' as tag | FROM dwh.dm_profile_total WHERE concat(year, month, day) = '@date' AND type = '@type' AND @check_deviceId AND (@check_birthday OR @check_gender) | group by device_id, device_type """.stripMargin val dsp_profile_sql_lr = s""" |SELECT device_id, device_type, max(birthday) birthday, min(gender) gender, 'dsp' as tag | FROM dwh.dsp_profile_total WHERE dt = '@date' AND @check_deviceId AND (@check_birthday OR @check_gender) | GROUP BY device_id, device_type """.stripMargin val ga_sql = s""" |SELECT device_id, device_type, birth_year birthday, gender, 'ga' as tag | FROM dwh.ods_ga_device_total WHERE concat(year, month, day) = '@date' AND @check_deviceId AND (@check_birthday OR @check_gender) """.stripMargin val ga_sql_lr = s""" |SELECT device_id, device_type, min(birth_year) birthday, min(gender) gender, 'ga' as tag | FROM dwh.ods_ga_device_total WHERE concat(year, month, day) = '@date' AND @check_deviceId AND (@check_birthday OR @check_gender) | group by device_id, device_type """.stripMargin val fb_sql = s"""select device_id,device_type,platform,package_names,country,gender from etl_facebook_total WHERE @check_deviceId """.stripMargin val tp_sql = s""" | SELECT device_id,device_type,platform,case when gender = 'male' then 'm' when gender = 'female' then 'f' else gender end as gender | from dwh.etl_gender_thirdparty_data_total where dt ='@date' AND @check_deviceId """.stripMargin }