CreateORC.scala 1.69 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
package mobvista.dmp.demo

import org.apache.spark.sql.SparkSession

class CreateORC{
  protected def run(args: Array[String]) = {
    val spark = SparkSession.builder().appName("test").getOrCreate()
    val sc = spark.sparkContext
    import spark.implicits._
    sc.parallelize(Array(
      UserInfo("did1", "gaid", "android", "US", "18-24", "f", "tags1"),
      UserInfo("did2", "idfa", "ios", "CN", "25-44", "m", "tags2")
    )).toDF().write.format("ORC").save("s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/ods_dmp_user_info/2017/09/27")
    0
  }
  /*
  userTable.as("old").toDF.merge(
    userDF1.as("new"),
    "old.device_id = new.device_id AND old.device_type = new.device_type"
  ).whenMatched.updateExpr(Map("platform" -> "new.platform","countrt"->"new.country","age"->"new.age","gender"->"new.gender","tags"->"new.tag"))
    .whenNotMatched.insertExpr(Map("device_id"->"new.device_id","device_type"->"new.device_type","platform" -> "new.platform","countrt"->"new.country","age"->"new.age","gender"->"new.gender","tags"->"new.tag")).execute()
  */
}

/*
  +---------+-----------+--------+-------+-----+------+-----+
|device_id|device_type|platform|country|  age|gender| tags|
+---------+-----------+--------+-------+-----+------+-----+
|     did3|       imei| android|     US|18-24|     f|tags1|
|     did1|       gaid| android|     CN|25-40|     f|tags2|
|     did2|       idfa|     ios|     CN|25-44|     m|tags2|
+---------+-----------+--------+-------+-----+------+-----+
 */

object CreateORC{
  def main(args: Array[String]): Unit = {
    new CreateORC().run(args)
  }
}

case class UserInfo(device_id: String, device_type: String, platform: String, country: String, age: String, gender: String, tags: String)