Commit 6b3ba3b6 by WangJinfeng

fix tracking etl job

parent 361e9bab
...@@ -51,7 +51,7 @@ public class GetDevIdUtil { ...@@ -51,7 +51,7 @@ public class GetDevIdUtil {
if (StringUtils.isNotBlank(devId)) { if (StringUtils.isNotBlank(devId)) {
return MRUtils.JOINER.join(devId, deviceType); return MRUtils.JOINER.join(devId, deviceType);
} else { } else {
return null; return "";
} }
} }
......
...@@ -14,15 +14,31 @@ abstract class CommonSparkJob { ...@@ -14,15 +14,31 @@ abstract class CommonSparkJob {
val ENCODING = "UTF-8" val ENCODING = "UTF-8"
val HTTPPREFIX = "http://test.com" val HTTPPREFIX = "http://test.com"
val DATA_SPLIT = "\t" val DATA_SPLIT = "\t"
val didPtn = "^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$"
val imeiPtn = "^([0-9]{15,17})$"
val imeiMd5Ptn = "^([a-fA-F0-9]{32})$" val imeiMd5Ptn = "^([a-fA-F0-9]{32})$"
val andriodIdPtn = "^[a-zA-Z0-9]{16}$"
val oaidAnotherPtn = "^([a-fA-F0-9]{1,64})$" val oaidAnotherPtn = "^([a-fA-F0-9]{1,64})$"
val md5Ptn = """^([0-9a-zA-Z])\1{30,32}""" // IDFA/GAID
val didPtn = "^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$"
// 全0
val allZero = "00000000-0000-0000-0000-000000000000" val allZero = "00000000-0000-0000-0000-000000000000"
// IMEI
val imeiPtn = "^([0-9]{14,17})$"
// 14~16位连续多位相同字符,非法IMEI过滤
val imeiPtnAll = """^([0-9])\1{14,16}"""
// androidId
val andriodIdPtn = "^[a-zA-Z0-9]{15,17}$"
// 连续多位相同字符,非法 androidId 过滤
val andriodIdAll = "^[a-zA-Z0-9]\1{15}$"
// MD5
val md5Ptn = "^([a-fA-F0-9]{32})$"
// 连续多位相同字符,非法 IMEI MD5 过滤
val umd5Ptn = """^([0-9A-Za-z])\1{29,31}"""
// OAID
val oaidPtb = """^[0-9A-Za-z-]{16,64}$"""
// IP
val ipPtn = """^(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])(\.(25[0-5]|2[0-4][0-9]|[0-1]?[0-9]?[0-9])){3}$"""
// Date
val datePtn = """^\d{4}-\d{2}-\d{2}"""
val options = buildOptions() val options = buildOptions()
val commParser = new BasicParser val commParser = new BasicParser
......
...@@ -80,7 +80,7 @@ object AdnConstant { ...@@ -80,7 +80,7 @@ object AdnConstant {
} else { } else {
"" ""
} }
if (StringUtils.isBlank(ruid) || ruid.length < 16) { if (StringUtils.isBlank(ruid) || ruid.length < 16 || ruid.length > 64) {
ruid = "" ruid = ""
} }
ruid ruid
......
...@@ -3,7 +3,7 @@ package mobvista.dmp.datasource.adn ...@@ -3,7 +3,7 @@ package mobvista.dmp.datasource.adn
import mobvista.dmp.common.{CommonSparkJob, MobvistaConstant} import mobvista.dmp.common.{CommonSparkJob, MobvistaConstant}
import mobvista.dmp.util.MRUtils import mobvista.dmp.util.MRUtils
import org.apache.commons.cli.{BasicParser, Options} import org.apache.commons.cli.{BasicParser, Options}
import org.apache.commons.lang.StringUtils import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.io.compress.GzipCodec
...@@ -85,8 +85,68 @@ class AdnOrgLogEtlHours extends CommonSparkJob with Serializable { ...@@ -85,8 +85,68 @@ class AdnOrgLogEtlHours extends CommonSparkJob with Serializable {
val oaid = r.getAs[String]("oaid") val oaid = r.getAs[String]("oaid")
val ruid = r.getAs[String]("ruid") val ruid = r.getAs[String]("ruid")
if ((StringUtils.isNotBlank(idfa) && idfa.matches(MobvistaConstant.didPtn)) || val f_idfa = if (StringUtils.isNotBlank(idfa) && (idfa.matches(didPtn) && !idfa.matches(allZero) || idfa.matches(md5Ptn))) {
(StringUtils.isNotBlank(idfv) && idfv.matches(MobvistaConstant.didPtn)) || (StringUtils.isNotBlank(oaid) && oaid.matches(MobvistaConstant.oaidPtb)) || idfa
} else {
""
}
val f_idfv = if (StringUtils.isNotBlank(idfv) && (idfv.matches(didPtn) && !idfv.matches(allZero) || idfv.matches(md5Ptn))) {
idfv
} else {
""
}
val f_imei = if (StringUtils.isNotBlank(imei) && (imei.matches(imeiPtn) && !imei.matches(imeiPtnAll) || imei.matches(md5Ptn))) {
imei
} else {
""
}
val f_androidId = if (StringUtils.isNotBlank(androidId) && (androidId.matches(andriodIdPtn) && !androidId.matches(andriodIdAll) || androidId.matches(md5Ptn))) {
androidId
} else {
""
}
val f_oaid = if (StringUtils.isNotBlank(oaid) && oaid.length >= 16 && oaid.length <= 64) {
oaid
} else {
""
}
val f_gaid = if (StringUtils.isNotBlank(gaid) && (gaid.matches(didPtn) && !gaid.matches(allZero) || gaid.matches(md5Ptn))) {
gaid
} else {
""
}
val f_sysId = if (StringUtils.isNotBlank(sysId) && (sysId.matches(didPtn) && !sysId.matches(allZero) || sysId.matches(md5Ptn))) {
sysId
} else {
""
}
var f_platform = if (StringUtils.isNotBlank(platform)) {
platform.toLowerCase()
} else {
""
}
f_platform = if (f_platform.contains("ios") || f_platform.contains("iphone") || deviceBrand.toLowerCase.contains("apple")
|| deviceModel.toLowerCase.contains("iphone") || deviceModel.toLowerCase.contains("ipad") || osVersion.toLowerCase.contains("ios")
|| StringUtils.isNotBlank(f_idfa) || StringUtils.isNotBlank(f_idfv)) {
"ios"
} else if (f_platform.contains("android") || osVersion.toLowerCase.contains("android") ||
StringUtils.isNotBlank(f_imei) || StringUtils.isNotBlank(f_androidId) || StringUtils.isNotBlank(f_oaid) || StringUtils.isNotBlank(f_gaid)) {
"android"
} else {
"other"
}
if ((StringUtils.isNotBlank(f_idfa) || StringUtils.isNotBlank(f_idfv) || StringUtils.isNotBlank(f_imei) || StringUtils.isNotBlank(f_androidId) ||
StringUtils.isNotBlank(f_oaid) || StringUtils.isNotBlank(f_gaid) || StringUtils.isNotBlank(f_sysId) || StringUtils.isNotBlank(ruid)) &&
!"other".equals(f_platform)) {
MRUtils.JOINER.join(date, time, timestamp, appId, f_platform, osVersion, sdkVersion, deviceModel, screenSize, countryCode,
language, ip, f_imei, mac, f_androidId, f_gaid, f_idfa, deviceBrand, f_sysId, packageName, strategy, f_oaid, f_idfv, ruid)
} else {
null
}
/*
if ((StringUtils.isNotBlank(idfa) && idfa.matches(MobvistaConstant.didPtn)) || (StringUtils.isNotBlank(idfv) && idfv.matches(MobvistaConstant.didPtn)) ||
(StringUtils.isNotBlank(sysId) && sysId.matches(MobvistaConstant.didPtn)) || (StringUtils.isNotBlank(ruid) && ruid.length > 16)) { (StringUtils.isNotBlank(sysId) && sysId.matches(MobvistaConstant.didPtn)) || (StringUtils.isNotBlank(ruid) && ruid.length > 16)) {
val plt = if (StringUtils.isNotBlank(platform)) { val plt = if (StringUtils.isNotBlank(platform)) {
platform platform
...@@ -108,6 +168,7 @@ class AdnOrgLogEtlHours extends CommonSparkJob with Serializable { ...@@ -108,6 +168,7 @@ class AdnOrgLogEtlHours extends CommonSparkJob with Serializable {
} else { } else {
null null
} }
*/
}).filter(l => { }).filter(l => {
StringUtils.isNotBlank(l) StringUtils.isNotBlank(l)
}) })
......
...@@ -112,55 +112,55 @@ class AdnRequestSdkEtlDaily extends CommonSparkJob with java.io.Serializable { ...@@ -112,55 +112,55 @@ class AdnRequestSdkEtlDaily extends CommonSparkJob with java.io.Serializable {
if (StringUtils.isNotBlank(ruid) && ruid.length > 16) { if (StringUtils.isNotBlank(ruid) && ruid.length > 16) {
linesArr += Row(ruid, "ruid", platform, appId, model, brand, osVersion, country, strategy, region, 1) linesArr += Row(ruid, "ruid", platform, appId, model, brand, osVersion, country, strategy, region, 1)
} }
if (StringUtils.isNotBlank(idfa) && idfa.matches(mobvista.dmp.common.MobvistaConstant.didPtn) && !idfa.matches(mobvista.dmp.common.MobvistaConstant.allZero)) { if (StringUtils.isNotBlank(idfa) && (idfa.matches(didPtn) && !idfa.matches(allZero) || idfa.matches(md5Ptn))) {
linesArr += Row(idfa, "idfa", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(idfa, "idfa", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
if (StringUtils.isNotBlank(sysId)) { if (StringUtils.isNotBlank(sysId)) {
linesArr += Row(sysId, "sysid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(sysId, "sysid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
dev_tag = 0 dev_tag = 0
if (StringUtils.isNotBlank(idfv) && idfv.matches(mobvista.dmp.common.MobvistaConstant.didPtn) && !idfv.matches(mobvista.dmp.common.MobvistaConstant.allZero)) { if (StringUtils.isNotBlank(idfv) && (idfv.matches(didPtn) && !idfv.matches(allZero) || idfv.matches(md5Ptn))) {
linesArr += Row(idfv, "idfv", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(idfv, "idfv", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
} else { } else {
if (StringUtils.isNotBlank(sysId)) { if (StringUtils.isNotBlank(sysId)) {
linesArr += Row(sysId, "sysid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(sysId, "sysid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
if (StringUtils.isNotBlank(idfv) && idfv.matches(mobvista.dmp.common.MobvistaConstant.didPtn) && !idfv.matches(mobvista.dmp.common.MobvistaConstant.allZero)) { if (StringUtils.isNotBlank(idfv) && (idfv.matches(didPtn) && !idfv.matches(allZero) || idfv.matches(md5Ptn))) {
linesArr += Row(idfv, "idfv", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(idfv, "idfv", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
} else { } else {
if (StringUtils.isNotBlank(idfv) && idfv.matches(mobvista.dmp.common.MobvistaConstant.didPtn) && !idfv.matches(mobvista.dmp.common.MobvistaConstant.allZero)) { if (StringUtils.isNotBlank(idfv) && (idfv.matches(didPtn) && !idfv.matches(allZero) || idfv.matches(md5Ptn))) {
linesArr += Row(idfv, "idfv", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(idfv, "idfv", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
} }
} }
case "android" => case "android" =>
var dev_tag = 1 var dev_tag = 1
if (StringUtils.isNotBlank(gaid) && gaid.matches(mobvista.dmp.common.MobvistaConstant.didPtn) && !gaid.matches(mobvista.dmp.common.MobvistaConstant.allZero)) { if (StringUtils.isNotBlank(gaid) && (gaid.matches(didPtn) && !gaid.matches(allZero) || gaid.matches(md5Ptn))) {
linesArr += Row(gaid, "gaid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(gaid, "gaid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
if (StringUtils.isNotBlank(oaid) && oaid.matches(mobvista.dmp.common.MobvistaConstant.didPtn) && !oaid.matches(mobvista.dmp.common.MobvistaConstant.allZero)) { if (StringUtils.isNotBlank(oaid) && (oaid.matches(didPtn) && !oaid.matches(allZero) || oaid.matches(md5Ptn))) {
linesArr += Row(oaid, "oaid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(oaid, "oaid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
dev_tag = 0 dev_tag = 0
if (StringUtils.isNotBlank(sysId)) { if (StringUtils.isNotBlank(sysId)) {
linesArr += Row(sysId, "sysid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(sysId, "sysid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
if (StringUtils.isNotBlank(imei) && imei.matches(mobvista.dmp.common.MobvistaConstant.imeiPtn)) { if (StringUtils.isNotBlank(imei) && (imei.matches(imeiPtn) || imei.matches(md5Ptn))) {
linesArr += Row(imei, "imei", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(imei, "imei", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
if (StringUtils.isNotBlank(androidId) && androidId.matches(mobvista.dmp.common.MobvistaConstant.andriodIdPtn)) { if (StringUtils.isNotBlank(androidId) && (androidId.matches(andriodIdPtn) || androidId.matches(md5Ptn))) {
linesArr += Row(androidId, "androidId", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(androidId, "androidId", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
} else { } else {
if (StringUtils.isNotBlank(oaid) && oaid.matches(mobvista.dmp.common.MobvistaConstant.didPtn) && !oaid.matches(mobvista.dmp.common.MobvistaConstant.allZero)) { if (StringUtils.isNotBlank(oaid) && (oaid.matches(didPtn) && !oaid.matches(allZero) || oaid.matches(md5Ptn))) {
linesArr += Row(oaid, "oaid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(oaid, "oaid", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
if (StringUtils.isNotBlank(imei) && imei.matches(mobvista.dmp.common.MobvistaConstant.imeiPtn) && "android".equals(platform)) { if (StringUtils.isNotBlank(imei) && (imei.matches(imeiPtn) || imei.matches(md5Ptn))) {
if (dev_tag == 1) { if (dev_tag == 1) {
dev_tag = 0 dev_tag = 0
} }
linesArr += Row(imei, "imei", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag) linesArr += Row(imei, "imei", platform, appId, model, brand, osVersion, country, strategy, region, dev_tag)
} }
if (StringUtils.isNotBlank(androidId) && androidId.matches(mobvista.dmp.common.MobvistaConstant.andriodIdPtn) && "android".equals(platform)) { if (StringUtils.isNotBlank(androidId) && (androidId.matches(andriodIdPtn) || androidId.matches(md5Ptn))) {
if (dev_tag == 1) { if (dev_tag == 1) {
dev_tag = 0 dev_tag = 0
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment