fix bug rtdmp_normal

e4336377 · fan.jiang · 68629eef · e4336377
Commit e4336377 authored Oct 13, 2021 by fan.jiang
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 8 deletions

RtdmpNormal.scala src/main/scala/mobvista/dmp/datasource/dm/RtdmpNormal.scala +17 -8

No files found.
--- a/src/main/scala/mobvista/dmp/datasource/dm/RtdmpNormal.scala
+++ b/src/main/scala/mobvista/dmp/datasource/dm/RtdmpNormal.scala
@@ -121,15 +121,24 @@ class RtdmpNormal extends CommonSparkJob with Serializable {
        val package_name: String = array(index)._4
        val country_code: String = array(index)._5
        println(inputPath)
-        inputDataRdd = inputDataRdd.union(spark.sparkContext.textFile(inputPath).map(row => {
+        val pathUri = new URI(inputPath)
-          if (row.length == 32) {
+        //过滤后面这种不存在的s3路径  s3://mob-emr-test/dataplatform/rtdmp_request/2021/07/10/dsp_req/com.taobao.idlefish_bes/*/,
-            DmpDailyDataInformation(row, device_type_md5, platform, package_name, country_code)
+        if (FileSystem.get(new URI(s"${pathUri.getScheme}://${pathUri.getHost}"), sc.hadoopConfiguration)
-          }
+          .exists(new Path(pathUri.toString.replace("*", "")))){
-          else {
+            inputDataRdd = inputDataRdd.union(spark.sparkContext.textFile(inputPath).map(row => {
-            DmpDailyDataInformation(row, device_type_not_md5, platform, package_name, country_code)
+              if (row.length == 32) {
-          }
+                DmpDailyDataInformation(row, device_type_md5, platform, package_name, country_code)
-        }
+              }
+              else {
+                DmpDailyDataInformation(row, device_type_not_md5, platform, package_name, country_code)
+              }
+            }
        ))
+        }else{
+            println(inputPath+" not existed!")
+            inputDataRdd = inputDataRdd.union(spark.sparkContext.emptyRDD[DmpDailyDataInformation])
+        }
      }
      val df: DataFrame = inputDataRdd.toDF().persist(StorageLevel.MEMORY_AND_DISK_SER)