#!/bin/sh # # # # # # # # # # # # # # # # # # # # # # # @author : 刘凯 # @date : 2018-02-18 # @desc : 解析fluentd joypc_sdk daily数据 至 etl daily 表 # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d") date_path=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d") year=$(date -d "$ScheduleTime 1 days ago" +"%Y") month=$(date -d "$ScheduleTime 1 days ago" +"%m") day=$(date -d "$ScheduleTime 1 days ago" +"%d") INPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/stage_joypac_upload_daily/$date_path/*/*.gz" ## OUTPUT_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_joypc_sdk_daily/$date_path" OUTPUT_PATH="${JOYPAC_DAILY_LOG_PATH}/$date_path" hadoop fs -rm -r "$OUTPUT_PATH/" spark-submit --class mobvista.dmp.datasource.joypac.JoypcSdkDaily \ --conf spark.yarn.executor.memoryOverhead=2048 --conf spark.network.timeout=720s \ --master yarn --deploy-mode cluster --name joypc_sdk_daily --files /data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.input_path=${INPUT_PATH} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \ ../${JAR} if [ $? -ne 0 ];then exit 255 fi mount_partition "etl_joypc_sdk_daily" "day='${LOG_TIME}'" "$OUTPUT_PATH"