pre_create_tables.sh 1.28 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#!/bin/bash
source ./common/tools.sh
source ../dmp_env.sh

today=${ScheduleTime:-$1}

echo "ScheduleTime=$ScheduleTime"
echo "toady=$today"

date_path=$(date +%Y/%m/%d -d "-1 day $today")
date_str_undline=$(date +%Y_%m_%d -d "-1 day $today")
date_str_midline=$(date +%Y-%m-%d -d "-1 day $today")

HIVE_CMD=$(hive_func)

$HIVE_CMD -v -hivevar prefix=$date_path -hivevar date_str_undline=$date_str_undline -hivevar date_str_midline=$date_str_midline -f pre_create_tables.sql

18 19
# INPUT_PATH="s3://live-ga-rawdata-annotated/${date_path}"
INPUT_PATH="s3://ga-annotated-data/${date_path}"
wang-jinfeng committed
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
OUTPUT_PATH="${GA_ODS_RAW_DATA_PATH}/${date_path}"

spark-submit --class mobvista.dmp.datasource.ga.GaParser \
  --name "mobvista.dmp.datasource.ga.GaParser" \
  --conf spark.yarn.executor.memoryOverhead=2048 \
  --conf spark.network.timeout=720s \
  --conf spark.default.parallelism=1000 \
  --conf spark.sql.adaptive.enabled=true \
  --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
  --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
  --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 100 \
  ../${JAR} -date_str_midline ${date_str_midline} -input ${INPUT_PATH} -output ${OUTPUT_PATH} -coalesce 4000

if [ $? -ne 0 ]; then
  exit 255
fi