1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
package mobvista.dmp.datasource.device
import java.text.SimpleDateFormat
import java.util
import java.util.regex.Pattern
import com.google.common.collect.Sets
import org.apache.spark.sql.types.{StringType, StructField, StructType}
/**
* @package: mobvista.dmp.datasource.device
* @author: wangjf
* @create: 2018-08-28 16:50
**/
object Constant {
val iosPkgPtn = Pattern.compile("^\\d+$")
val adrPkgPtn = Pattern.compile("^[0-9a-zA-Z\\.]+$")
var lineSplit: Pattern = Pattern.compile("-")
val wellSplit: Pattern = Pattern.compile("#")
val colonSplit: Pattern = Pattern.compile(":")
val verticalLine: Pattern = Pattern.compile("\\|")
val dollarSplit: Pattern = Pattern.compile("\\$")
val `match`: Pattern = Pattern.compile("^0*-0*-0*-0*-0*$")
val regex: Pattern = Pattern.compile("""^\d+$""")
val matchingSet: util.HashSet[String] = Sets.newHashSet("", "0", "1970", "GB", "null", "-")
val format = new SimpleDateFormat("yyyy-MM-dd")
val sdf = new SimpleDateFormat("yyyyMMdd")
val didPtn = "^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$"
val imeiPtn = "^([0-9]{15,17})$"
val dm_device_tag_sql =
s"""
|SELECT device_id,device_type,tag_type,first_tag,second_tag
|FROM dwh.dm_device_tag WHERE dt = '@dt' AND business = '@business' AND update_date = '@update_date'
""".stripMargin
val dm_install_list_sql: String =
s"""
|SELECT device_id,device_type,platform,install_list
|FROM dwh.dm_install_list WHERE CONCAT(year,month,day) = '@date' AND business = '@business'
""".stripMargin
val dm_install_list_v2_sql: String =
s"""
|SELECT device_id,device_type,platform,CONCAT_WS(',',COLLECT_SET(CONCAT(package_name,'#','update_date'))) install_list
| FROM dwh.dm_install_list_v2 WHERE dt = '@date' AND business = '@business' GROUP BY device_id,device_type,platform
| HAVING MAX(update_date) = '@update_date'
""".stripMargin
val dm_install_daily_schema = StructType(Array(
StructField("device_id", StringType),
StructField("device_type", StringType),
StructField("platform", StringType),
StructField("package_name", StringType),
StructField("update_date", StringType)))
val dm_device_gender_sql =
"""
|SELECT t.device_id, t.device_type, t.gender
|FROM (
| SELECT device_id, device_type, gender,
| row_number() OVER(PARTITION BY device_id, device_type, gender ORDER BY ratio DESC)AS rk
| FROM dwh.dm_device_gender_v2
| WHERE year = '@year' AND month = '@month' AND day = '@day' AND update_date = '@update_date'
|) t
|WHERE t.rk = '1'
""".stripMargin
val dm_device_age_sql =
s"""
|select device_id,device_type,split(getAgeRatio(age),'#')[0] as age,split(getAgeRatio(age),'#')[1] as ratio
|from dwh.dm_device_age_v2
|where year = '@year' and month = '@month'
|and day = '@day' and update_date = '@update_date'
""".stripMargin
val ageRatio =
"""
|select t.device_id, t.device_type, t.age
|from (
| select device_id, device_type, age,
| row_number() over(partition by device_id, device_type, age order by ratio desc)as rk
| from tmp_age
|) t
|where t.rk = '1'
""".stripMargin
val user_info_sql =
s"""select t.device_id, t.device_type, t.platform,
| case when upper(t.country) = 'GB' then 'UK' else t.country end as country,
| t.age, t.gender, t.tags, t.install_list, t.first_req_day, t.last_req_day
|from (
| select
| coalesce(a.device_id, b.device_id) as device_id,
| coalesce(a.device_type, b.device_type) as device_type,
| coalesce(a.platform, b.platform) as platform,
| coalesce(a.country, b.country) as country,
| coalesce(a.age, b.age, '') as age,
| coalesce(a.gender, b.gender, '') as gender,
| '' as tags,
| coalesce(a.install_list,b.install_list, '') as install_list,
| case when
| b.device_id is null
| then
| '@date'
| else
| b.first_req_day
| end as first_req_day,
| case when
| a.device_id is null
| then
| b.last_req_day
| else
| '@date'
| end as last_req_day
| from (
| select /*+ mapjoin(t)*/ t.device_id, t.device_type, t.platform, t.country, a.age, g.gender, p.install_list
| from (
| select t.device_id, t.device_type, t.platform, t.country,
| row_number() over(partition by t.device_id, t.device_type order by t.country desc ) as rk
| from t_daily t
| where t.device_id rlike '${didPtn}' or t.device_id rlike '${imeiPtn}'
| ) t
| left outer join t_age a on (upper(a.device_id) = upper(t.device_id) and a.device_type = t.device_type)
| left outer join t_gender g on (upper(g.device_id) = upper(t.device_id) and g.device_type = t.device_type)
| left outer join t_package p on (upper(p.device_id) = upper(t.device_id) and p.device_type = t.device_type)
| where t.rk = 1
| ) a
| full outer join t_total b
| on a.device_id = b.device_id and a.device_type = b.device_type
|) t
""".stripMargin
val ods_dmp_user_info_sql =
s"""
|SELECT device_id, device_type, platform, country, '' age, '' gender , '' tags,
| first_req_day, last_req_day
| FROM dwh.ods_dmp_user_info WHERE dt = '@dt' AND business = '@business'
""".stripMargin
val alter_partition =
"""
|USE dwh;
|ALTER TABLE @table ADD IF NOT EXISTS PARTITION (@part) LOCATION '@location';
""".stripMargin
val old2new_sql: String =
"""
|SELECT UPPER(CONCAT(tag_type, '-', first_tag, '-', second_tag)) tag_code, new_second_id FROM
| dwh.dm_old2new_tag
""".stripMargin
}