#!/bin/sh # # # # # # # # # # # # # # # # # # # # # # # @author : 冯亮 # @date : 2017-08-15 # @desc : 解析adn抢发数据(又名探测数据) # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh LOG_TIME=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d") dt=$(date -d "$ScheduleTime 1 days ago" +"%Y-%m-%d") date_path=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d") ## INPUT_PATH="${CLEVER_NGINX_LOG}/$date_path/*/*/{35.172.138.115-*,54.197.195.161-*,54.254.211.147-*,54.255.194.253-*,18.184.191.23-*,18.197.98.24-*}" INPUT_PATH="${CLEVER_NGINX_LOG}/$date_path/*/*/{net.access.vg_adn_netGoCleverjp_*,net.access.sg_adn_netGoCleverjp_*,net.access.fk_adn_netGoCleverjp*}" OUTPUT_PATH="${CLEVER_DAILY_PATH}/$date_path" : ' check_await "${CLEVER_NGINX_LOG}/$date_path/virginia/23/_SUCCESS" hadoop fs -rm -r "$OUTPUT_PATH/" spark-submit --class mobvista.dmp.datasource.clever.ParseCleverDaily \ --conf spark.yarn.executor.memoryOverhead=2048 --conf spark.network.timeout=720s \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 40 \ ../${JAR} -input $INPUT_PATH -output $OUTPUT_PATH -parallelism 100 -coalesce 20 if [ $? -ne 0 ];then exit 255 fi ' hadoop fs -mkdir -p ${OUTPUT_PATH} hadoop fs -touchz ${OUTPUT_PATH}/_SUCCESS mount_partition "etl_adn_clever_daily" "\`date\`='${LOG_TIME}'" "$OUTPUT_PATH"