#!/bin/bash

# # # # # # # # # # # # # # # # # # # # # # 
# @file  : install_list_eggplants.sh
# @author: houying
# @date  : 17-3-14
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

LOG_TIME=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")
year=${LOG_TIME:0:4}
month=${LOG_TIME:4:2}
day=${LOG_TIME:6:2}

DEVICE_PATH="$TMP_EGGPLANTS_INPUT_PATH/$year/$month/$day"
OUTPUT_PATH="$TMP_EGGPLANTS_OUTPUT_PATH/$year/$month/$day"
PROGRESS_PATH="$TMP_EGGPLANTS_PROGRESS_PATH/$year/$month/$day"

hadoop fs -rm -r "$OUTPUT_PATH"
hadoop fs -mkdir -p "$OUTPUT_PATH"

hadoop fs -mkdir -p "$PROGRESS_PATH"

# $1 第几次
# $2 s3 file
#   注释掉下面的任务
: <<'COMMENT'
function http_catch_eggplants() {
    local num="$1"
    local file="$2"
    local local_file="result_$num.txt"
    echo "processing $file into $local_file ..."
    hadoop fs -get "$file" "s3_${num}.txt"
    echo "[start at $(date "+%Y-%m-%d %H:%M:%S")]"
    java -cp ../${JAR} mobvista.dmp.datasource.eggplants.HttpCatchEggplants "s3_$num.txt" "$local_file" "package.list"
    if [[ "$(cat ${local_file}|wc -l)" -ne "0" ]]
    then
        hadoop fs -rm "$OUTPUT_PATH/$local_file"
        hadoop fs -put "$local_file" "$OUTPUT_PATH"
    fi
    rm "s3_$num.txt" "$local_file"
    echo "[end at $(date "+%Y-%m-%d %H:%M:%S")]"
}

num=0
PROGRESS="$(hadoop fs -text "${PROGRESS_PATH}/*")"
for file in $(hadoop fs -ls "$DEVICE_PATH" | awk -v'FS= +' '{print $6}')
do
    if [[ ! -z ${PROGRESS} ]] && [[ ! -z "$(grep "$file" <<< "$PROGRESS")" ]]
    then
        num=$(($num+1))
        continue
    fi
    http_catch_eggplants ${num} ${file}
    echo "$file" > "${num}_DONE"
    hadoop fs -put "${num}_DONE" "$PROGRESS_PATH"
    if [[ "$(date "+%Y%m%d")" != "$(date -d "$ScheduleTime" "+%Y%m%d")" ]]
    then
        break
    fi
    num=$(($num+1))
done
COMMENT