bundle_match.sh 2.95 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#!/bin/sh

# # # # # # # # # # # # # # # # # # # # # #
# @author: fengliang
# @date  : 2017-07-31
# @desc  : 抓取s3上未匹配appleId的bundle信息,并请求appstore接口匹配appleId
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

date_path=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d")
yestoday_path=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d")

INPUT_PATH="${UNMATCH_DATA_PATH}/$yestoday_path/*"
OUTPUT_PATH="${DISTINCT_BUNDLE_PATH}/$date_path"
OLD_BUNDLE_MAPPING_PATH="$BUNDLE_PACKAGE_MAPPING_PATH/$yestoday_path"
NEW_BUNDLE_MAPPING_PATH="$BUNDLE_PACKAGE_MAPPING_PATH/$date_path"

hadoop fs -test -e $INPUT_PATH
if [ $? -eq 0 ];then
  check_await $OLD_BUNDLE_MAPPING_PATH/_SUCCESS

  hadoop fs -rm -r $OUTPUT_PATH

  # find unmanned
26
  spark-submit --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g  --executor-cores 2 --num-executors 15 \
wang-jinfeng committed
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
  --conf spark.yarn.executor.memoryOverhead=2048M \
  --class mobvista.dmp.main.FindUnmatchBundle ../${JAR} -input $INPUT_PATH  -output $OUTPUT_PATH
  if [ $? -ne 0 ];then
    exit 255
  fi
fi

bundel_file="./bundleFile.txt"
hadoop fs -getmerge $OUTPUT_PATH $bundel_file
if [ $? -ne 0 ];then
  hadoop fs -mkdir -p $NEW_BUNDLE_MAPPING_PATH
  hadoop fs -cp $OLD_BUNDLE_MAPPING_PATH/* $NEW_BUNDLE_MAPPING_PATH
  echo "bundle数据不存在,程序正常退出"
  exit 0
fi

echo "共`wc -l $bundel_file | awk '{print $1}'` 个bundle"

# 切分bundle文件,每个文件交个一个进程处理
: '
split -l 100 $bundel_file bundlePrefix
if [ $? -ne 0 ];then
  exit 255
fi

input=""
files=`ls bundlePrefix*`
for f in $files;do
  input="${input};$f"
done

# 请求App Store接口,配配appleId

output="./data"
scala_jars=`ls /data/hadoop-alternative/spark/jars/scala*.jar`
scala_libary=""
for j in $scala_jars;do
  scala_libary="$scala_libary:$j"
done
java -cp ../${JAR}:`hadoop classpath`:${scala_libary:1} mobvista.dmp.demo.MatchTrackId -input ${input:1} -output $output
if [ $? -ne 0 ];then
  exit 255
fi

match_file="./match.txt"
cat ./data* > $match_file
'

data_dir=`pwd`
input=${data_dir}/${bundel_file}
match_file="./match.txt"
output=${data_dir}/${match_file}

java -cp ../${JAR} mobvista.dmp.datasource.bundle.service.BundleMatchServer ${input} ${output}

old_mapping_file="./mapping.txt"
hadoop fs -getmerge $OLD_BUNDLE_MAPPING_PATH $old_mapping_file
if [ $? -ne 0 ];then
  exit 255
fi

# 合并bundle对应package信息
result_file="./result.txt"
java -cp ../${JAR}:${scala_libary:1} mobvista.dmp.main.MergeBundlePkg $old_mapping_file $match_file $result_file
if [ $? -ne 0 ];then
  exit 255
fi

hadoop fs -test -e $NEW_BUNDLE_MAPPING_PATH
if [ $? -ne 0 ];then
  hadoop fs -mkdir -p $NEW_BUNDLE_MAPPING_PATH
else
  hadoop fs -rm $NEW_BUNDLE_MAPPING_PATH/*
fi

hadoop fs -put $result_file $NEW_BUNDLE_MAPPING_PATH
if [ $? -ne 0 ];then
  exit 255
fi

hadoop fs -touchz $NEW_BUNDLE_MAPPING_PATH/_SUCCESS
if [ $? -ne 0 ];then
  exit 255
fi


rm $bundel_file $output $old_mapping_file $result_file

exit 0