#!/bin/sh # # # # # # # # # # # # # # # # # # # # # # # @author: fengliang # @date : 2017-07-31 # @desc : 抓取s3上未匹配appleId的bundle信息,并请求appstore接口匹配appleId # # # # # # # # # # # # # # # # # # # # # # source ../dmp_env.sh date_path=$(date -d "$ScheduleTime 1 days ago" +"%Y/%m/%d") yestoday_path=$(date -d "$ScheduleTime 2 days ago" +"%Y/%m/%d") INPUT_PATH="${UNMATCH_DATA_PATH}/$yestoday_path/*" OUTPUT_PATH="${DISTINCT_BUNDLE_PATH}/$date_path" OLD_BUNDLE_MAPPING_PATH="$BUNDLE_PACKAGE_MAPPING_PATH/$yestoday_path" NEW_BUNDLE_MAPPING_PATH="$BUNDLE_PACKAGE_MAPPING_PATH/$date_path" hadoop fs -test -e $INPUT_PATH if [ $? -eq 0 ];then check_await $OLD_BUNDLE_MAPPING_PATH/_SUCCESS hadoop fs -rm -r $OUTPUT_PATH # find unmanned spark-submit --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 15 \ --conf spark.yarn.executor.memoryOverhead=2048M \ --class mobvista.dmp.main.FindUnmatchBundle ../${JAR} -input $INPUT_PATH -output $OUTPUT_PATH if [ $? -ne 0 ];then exit 255 fi fi bundel_file="./bundleFile.txt" hadoop fs -getmerge $OUTPUT_PATH $bundel_file if [ $? -ne 0 ];then hadoop fs -mkdir -p $NEW_BUNDLE_MAPPING_PATH hadoop fs -cp $OLD_BUNDLE_MAPPING_PATH/* $NEW_BUNDLE_MAPPING_PATH echo "bundle数据不存在,程序正常退出" exit 0 fi echo "共`wc -l $bundel_file | awk '{print $1}'` 个bundle" # 切分bundle文件,每个文件交个一个进程处理 : ' split -l 100 $bundel_file bundlePrefix if [ $? -ne 0 ];then exit 255 fi input="" files=`ls bundlePrefix*` for f in $files;do input="${input};$f" done # 请求App Store接口,配配appleId output="./data" scala_jars=`ls /data/hadoop-alternative/spark/jars/scala*.jar` scala_libary="" for j in $scala_jars;do scala_libary="$scala_libary:$j" done java -cp ../${JAR}:`hadoop classpath`:${scala_libary:1} mobvista.dmp.demo.MatchTrackId -input ${input:1} -output $output if [ $? -ne 0 ];then exit 255 fi match_file="./match.txt" cat ./data* > $match_file ' data_dir=`pwd` input=${data_dir}/${bundel_file} match_file="./match.txt" output=${data_dir}/${match_file} java -cp ../${JAR} mobvista.dmp.datasource.bundle.service.BundleMatchServer ${input} ${output} old_mapping_file="./mapping.txt" hadoop fs -getmerge $OLD_BUNDLE_MAPPING_PATH $old_mapping_file if [ $? -ne 0 ];then exit 255 fi # 合并bundle对应package信息 result_file="./result.txt" java -cp ../${JAR}:${scala_libary:1} mobvista.dmp.main.MergeBundlePkg $old_mapping_file $match_file $result_file if [ $? -ne 0 ];then exit 255 fi hadoop fs -test -e $NEW_BUNDLE_MAPPING_PATH if [ $? -ne 0 ];then hadoop fs -mkdir -p $NEW_BUNDLE_MAPPING_PATH else hadoop fs -rm $NEW_BUNDLE_MAPPING_PATH/* fi hadoop fs -put $result_file $NEW_BUNDLE_MAPPING_PATH if [ $? -ne 0 ];then exit 255 fi hadoop fs -touchz $NEW_BUNDLE_MAPPING_PATH/_SUCCESS if [ $? -ne 0 ];then exit 255 fi rm $bundel_file $output $old_mapping_file $result_file exit 0