#!/bin/bash source ./common/tools.sh source ../dmp_env.sh today=${ScheduleTime:-$1} echo "ScheduleTime=$ScheduleTime" echo "toady=$today" date_path=$(date +%Y/%m/%d -d "-1 day $today") date_str_undline=$(date +%Y_%m_%d -d "-1 day $today") date_str_midline=$(date +%Y-%m-%d -d "-1 day $today") HIVE_CMD=$(hive_func) $HIVE_CMD -v -hivevar prefix=$date_path -hivevar date_str_undline=$date_str_undline -hivevar date_str_midline=$date_str_midline -f pre_create_tables.sql # INPUT_PATH="s3://live-ga-rawdata-annotated/${date_path}" INPUT_PATH="s3://ga-annotated-data/${date_path}" OUTPUT_PATH="${GA_ODS_RAW_DATA_PATH}/${date_path}" spark-submit --class mobvista.dmp.datasource.ga.GaParser \ --name "mobvista.dmp.datasource.ga.GaParser" \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=1000 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 100 \ ../${JAR} -date_str_midline ${date_str_midline} -input ${INPUT_PATH} -output ${OUTPUT_PATH} -coalesce 4000 if [ $? -ne 0 ]; then exit 255 fi