#!/bin/bash # # # # # # # # # # # # # # # # # # # # # # # @file :ga_prepare.sh # @author :wangjf # @revision:2018-11-06 14:41:53 # # # # # # # # # # # # # # # # # # # # # # source ./dmp_env.sh ScheduleTime=$1 LOG_TIME=$(date +%Y%m%d -d "-1 day $ScheduleTime") year=${LOG_TIME:0:4} month=${LOG_TIME:4:2} day=${LOG_TIME:6:2} MID_TIME=$(date +%Y-%m-%d -d "-1 day $ScheduleTime") # check_await "${INPUT_PATH}/_SUCCESS" INPUT_PATH="s3://live-ga-rawdata-annotated/${year}/${month}/${day}" # 替换为线上路径 OUTPUT_PATH="s3://mob-emr-test/wangjf/ga/${year}/${month}/${day}" spark-submit --class mobvista.dmp.datasource.ga.GaParser \ --name "mobvista.dmp.datasource.ga.GaParser" \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.network.timeout=720s \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.default.parallelism=2000 \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.2 \ --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 180 \ ${JAR} -date_str_midline ${MID_TIME} -input ${INPUT_PATH} -output ${OUTPUT_PATH} -coalesce 2000