bundle_match.sh

#!/bin/sh

# # # # # # # # # # # # # # # # # # # # # #
# @author: fengliang
# @date  : 2017-07-31
# @desc  : 抓取s3上未匹配appleId的bundle信息，并请求appstore接口匹配appleId
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

date_path=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
yestoday_path=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d")

INPUT_PATH="${UNMATCH_DATA_PATH}/$yestoday_path/*"
OUTPUT_PATH="${DISTINCT_BUNDLE_PATH}/$date_path"
OLD_BUNDLE_MAPPING_PATH="$BUNDLE_PACKAGE_MAPPING_PATH/$yestoday_path"
NEW_BUNDLE_MAPPING_PATH="$BUNDLE_PACKAGE_MAPPING_PATH/$date_path"

hadoop fs -test -e $INPUT_PATH
if [ $? -eq 0 ];then
  check_await $OLD_BUNDLE_MAPPING_PATH/_SUCCESS

  hadoop fs -rm -r $OUTPUT_PATH

  # find unmanned
  spark-submit --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g  --executor-cores 2 --num-executors 15 \
  --conf spark.yarn.executor.memoryOverhead=2048M \
  --class mobvista.dmp.main.FindUnmatchBundle ../${JAR} -input $INPUT_PATH  -output $OUTPUT_PATH
  if [ $? -ne 0 ];then
    exit 255
  fi
fi

bundel_file="./bundleFile.txt"
hadoop fs -getmerge $OUTPUT_PATH $bundel_file
if [ $? -ne 0 ];then
  hadoop fs -mkdir -p $NEW_BUNDLE_MAPPING_PATH
  hadoop fs -cp $OLD_BUNDLE_MAPPING_PATH/* $NEW_BUNDLE_MAPPING_PATH
  echo "bundle数据不存在，程序正常退出"
  exit 0
fi

echo "共`wc -l $bundel_file | awk '{print $1}'` 个bundle"

# 切分bundle文件，每个文件交个一个进程处理
: '
split -l 100 $bundel_file bundlePrefix
if [ $? -ne 0 ];then
  exit 255
fi

input=""
files=`ls bundlePrefix*`
for f in $files;do
  input="${input};$f"
done

# 请求App Store接口，配配appleId

output="./data"
scala_jars=`ls /data/hadoop-alternative/spark/jars/scala*.jar`
scala_libary=""
for j in $scala_jars;do
  scala_libary="$scala_libary:$j"
done
java -cp ../${JAR}:`hadoop classpath`:${scala_libary:1} mobvista.dmp.demo.MatchTrackId -input ${input:1} -output $output
if [ $? -ne 0 ];then
  exit 255
fi

match_file="./match.txt"
cat ./data* > $match_file
'

data_dir=`pwd`
input=${data_dir}/${bundel_file}
match_file="./match.txt"
output=${data_dir}/${match_file}

java -cp ../${JAR} mobvista.dmp.datasource.bundle.service.BundleMatchServer ${input} ${output}

old_mapping_file="./mapping.txt"
hadoop fs -getmerge $OLD_BUNDLE_MAPPING_PATH $old_mapping_file
if [ $? -ne 0 ];then
  exit 255
fi

# 合并bundle对应package信息
result_file="./result.txt"
java -cp ../${JAR}:${scala_libary:1} mobvista.dmp.main.MergeBundlePkg $old_mapping_file $match_file $result_file
if [ $? -ne 0 ];then
  exit 255
fi

hadoop fs -test -e $NEW_BUNDLE_MAPPING_PATH
if [ $? -ne 0 ];then
  hadoop fs -mkdir -p $NEW_BUNDLE_MAPPING_PATH
else
  hadoop fs -rm $NEW_BUNDLE_MAPPING_PATH/*
fi

hadoop fs -put $result_file $NEW_BUNDLE_MAPPING_PATH
if [ $? -ne 0 ];then
  exit 255
fi

hadoop fs -touchz $NEW_BUNDLE_MAPPING_PATH/_SUCCESS
if [ $? -ne 0 ];then
  exit 255
fi


rm $bundel_file $output $old_mapping_file $result_file

exit 0