collect_package_name.sh 3.23 KB
Newer Older
wang-jinfeng committed
1 2
#!/bin/bash

wang-jinfeng committed
3
# # # # # # # # # # # # # # # # # # # # # #
wang-jinfeng committed
4 5 6 7 8 9 10 11
# @file  : collect_package_name.sh
# @author: houying
# @date  : 16-11-14
# @desc  : 收集需要抓取的package列表
# # # # # # # # # # # # # # # # # # # # # #

source ../dmp_env.sh

wang-jinfeng committed
12 13 14 15
BASE_PATH="$(
  cd "$(dirname $0)"
  pwd
)"
wang-jinfeng committed
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45

LOG_DATE=$(date -d "$ScheduleTime 1 days ago" "+%Y%m%d")

year=${LOG_DATE:0:4}
month=${LOG_DATE:4:2}
day=${LOG_DATE:6:2}

YESTERDAY=$(date -d "$ScheduleTime 2 days ago" "+%Y%m%d")
yes_year=${YESTERDAY:0:4}
yes_month=${YESTERDAY:4:2}
yes_day=${YESTERDAY:6:2}

PACKAGE_PATH="${PACKAGE_TMP_PATH}/${year}/${month}/${day}"
PACKAGE_INSTALL_PATH="${INSTALL_PACKAGE_TMP_PATH}"

# $1 output file
campaign() {
  campaign=$1
  check_await "$DIM_ADN_CAMPAIGN/$year/$month/$day/_SUCCESS"
  local UPDATE="$(date -d "$LOG_DATE 4 days ago" "+%Y-%m-%d")"
  local SQL="
  select package_name, platform
  from dim_adn_campaign
  where year='$year'
    and month='$month'
    and day='$day'
    and update_time != ''
    and update_time>='$UPDATE'
  group by package_name, platform
  "
wang-jinfeng committed
46
  hive_cmd "use dwh;$SQL;" >${campaign}
wang-jinfeng committed
47 48 49
  hadoop fs -put ${campaign} "$PACKAGE_PATH"
}

wang-jinfeng committed
50
: <<!
wang-jinfeng committed
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
install() {
    check_await "$DM_INSTALL_LIST/$yes_year/$yes_month/$yes_day"
    hive_cmd "
    use dwh;
    select b.package_name, b.platform
    from (
       select t.package_name
       from dev.dm_package_black_list t
       where t.dt='${yes_year}${yes_month}${yes_day}'
    ) a
    right outer join (
      select package_name, platform
      from dm_install_list_v2
      where dt='${yes_year}${yes_month}${yes_day}'
      group by package_name, platform
    ) b on a.package_name=b.package_name
    where a.package_name is null
    ;
    " > "$1"
}
!

install() {
  hadoop fs -rmr ${PACKAGE_INSTALL_PATH}

  DMP_INSTALL_LIST_PATH="${DMP_INSTALL_LIST}/${yes_year}/${yes_month}/${yes_day}/14days"

  check_await ${DMP_INSTALL_LIST_PATH}/_SUCCESS

  spark-submit --class mobvista.dmp.datasource.apptag.CrawPkgsSpark \
    --conf spark.default.parallelism=2000 \
    --conf spark.sql.shuffle.partitions=2000 \
    --conf spark.sql.autoBroadcastJoinThreshold=31457280 \
    --conf spark.kryoserializer.buffer.max=512m \
    --conf spark.driver.maxResultSize=4g \
WangJinfeng committed
86 87
    --master yarn --deploy-mode cluster --name CrawPkgsSpark --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 100 \
    ../${JAR} -pkginstallpath ${PACKAGE_INSTALL_PATH} -coalesce 20 \
wang-jinfeng committed
88 89
    -yesday ${YESTERDAY}

wang-jinfeng committed
90 91 92 93
  if [ $? -ne 0 ]; then
    exit 255
  fi

wang-jinfeng committed
94
  install=$1
wang-jinfeng committed
95 96

  if [[ $? -ne 0 ]]; then
wang-jinfeng committed
97 98
    exit 255
  fi
wang-jinfeng committed
99 100
  hadoop fs -text ${PACKAGE_INSTALL_PATH}/* >${install}
  if [[ $? -ne 0 ]]; then
wang-jinfeng committed
101 102 103
    exit 255
  fi
  hadoop fs -put ${install} "$PACKAGE_PATH"
wang-jinfeng committed
104
  if [[ $? -ne 0 ]]; then
wang-jinfeng committed
105 106 107 108 109 110 111
    exit 255
  fi
}

bundle_pkg_mapping() {
  path="$BUNDLE_PACKAGE_MAPPING_PATH/$year/$month/$day/"
  check_await "$path/_SUCCESS"
wang-jinfeng committed
112
  hadoop fs -text $path/* | awk -F '\t' '{print $2"\tios"}' >bundle.data
wang-jinfeng committed
113 114 115 116 117
  hadoop fs -rm $PACKAGE_PATH/bundle.data
  hadoop fs -put bundle.data $PACKAGE_PATH
}

hadoop fs -test -e ${PACKAGE_PATH}
wang-jinfeng committed
118
if [ $? -ne 0 ]; then
wang-jinfeng committed
119 120 121 122 123 124
  hadoop fs -mkdir -p ${PACKAGE_PATH}
fi

# 循环执行以上函数收集pkg_name
hadoop fs -rm ${PACKAGE_PATH}/*

wang-jinfeng committed
125
for cmd in install campaign; do
wang-jinfeng committed
126
  ${cmd} "$cmd.txt"
wang-jinfeng committed
127
  if [ $? -ne 0 ]; then
wang-jinfeng committed
128 129 130 131 132
    exit 255
  fi
done

bundle_pkg_mapping