From 911afe9ac6c7030fd6fd1f8c4d6d27b1e140cf62 Mon Sep 17 00:00:00 2001 From: WangJinfeng <wjf20110627@163.com> Date: Thu, 23 Sep 2021 18:26:53 +0800 Subject: [PATCH] remove --files、--jars, upgrade to spark 3.1.1 --- azkaban/3s/3s_install_device_tag.sh | 2 -- azkaban/3s/3s_install_device_tag_daily.sh | 2 -- azkaban/3s/3s_install_device_tag_v2.sh | 2 -- azkaban/3s/3s_tracking_install_total_orc.sh | 2 -- azkaban/3s_postback/3s_postback_daily.sh | 1 - azkaban/3s_postback/3s_postback_device_tag.sh | 2 -- azkaban/3s_postback/3s_postback_device_tag_daily.sh | 2 -- azkaban/3s_postback/3s_postback_device_tag_v2.sh | 2 -- azkaban/3s_postback/3s_postback_install_total_orc.sh | 1 - azkaban/ad_server_campaign/merge_campaign_list.sh | 1 - azkaban/adn/device/adn_org_etl_hours_frankfurt.sh | 1 - azkaban/adn/device/adn_org_etl_hours_seoul.sh | 1 - azkaban/adn/device/adn_org_etl_hours_singapore.sh | 1 - azkaban/adn/device/adn_org_etl_hours_virginia.sh | 1 - azkaban/adn/package/adn_install_device_tag.sh | 2 -- azkaban/adn/package/adn_install_device_tag_daily.sh | 2 -- azkaban/adn/package/adn_install_device_tag_v2.sh | 2 -- azkaban/adn/package/adn_install_total_orc.sh | 2 -- azkaban/adn/package/adn_install_total_v1.sh | 1 - azkaban/adn/package/adn_install_total_v2.sh | 2 -- azkaban/adn/package/adn_request_device_tag.sh | 2 -- azkaban/adn/package/adn_request_device_tag_daily.sh | 2 -- azkaban/adn/package/adn_request_device_tag_v2.sh | 2 -- azkaban/adn/package/adn_request_install_total_orc.sh | 1 - azkaban/adn/package/adn_request_other_daily.sh | 1 - azkaban/adn/package/adn_request_other_device_tag.sh | 2 -- azkaban/adn/package/adn_request_other_device_tag_daily.sh | 2 -- azkaban/adn/package/adn_request_other_device_tag_v2.sh | 2 -- azkaban/adn/package/adn_request_other_install.sh | 1 - azkaban/adn/package/adn_request_other_install_total_orc.sh | 2 -- azkaban/adn/package/adn_request_other_install_v1.sh | 1 - azkaban/adn/package/adn_request_other_install_v2.sh | 2 -- azkaban/adn/package/adn_request_pkg_total_v1.sh | 1 - azkaban/adn/package/adn_request_pkg_total_v2.sh | 2 -- azkaban/adn/package/adn_request_unmatch_install_total_orc.sh | 2 -- azkaban/adn_adx/adn_tencent_adx_device_tag.sh | 1 - azkaban/adn_adx/adn_tencent_adx_package.sh | 1 - azkaban/adn_sdk/adn_sdk_daily.sh | 2 -- azkaban/adn_sdk/adn_sdk_device_tag.sh | 2 -- azkaban/adn_sdk/adn_sdk_device_tag_daily.sh | 2 -- azkaban/adn_sdk/adn_sdk_device_tag_v2.sh | 2 -- azkaban/adn_sdk/adn_sdk_install_total_orc.sh | 2 -- azkaban/adn_sdk/adn_sdk_install_v1.sh | 1 - azkaban/adn_sdk/adn_sdk_install_v2.sh | 2 -- azkaban/adn_sdk/adn_sdk_v2_device_tag.sh | 2 -- azkaban/adn_sdk/adn_sdk_v2_device_tag_v2.sh | 2 -- azkaban/adn_sdk/adn_sdk_v2_install_total_orc.sh | 2 -- azkaban/adn_sdk/adn_sdk_v2_install_v1.sh | 1 - azkaban/adn_sdk/adn_sdk_v2_install_v2.sh | 2 -- azkaban/age/get_dsp_all.sh | 2 -- azkaban/age/get_ga_all.sh | 2 -- azkaban/age/merge_install_age.sh | 1 - azkaban/ali/TO/TO_daily.sh | 1 - azkaban/ali/ali_ck.sh | 1 - azkaban/ali/ali_daily.sh | 1 - azkaban/ali/ali_device_tag.sh | 2 -- azkaban/ali/ali_device_tag_daily.sh | 1 - azkaban/ali/ali_device_tag_v2.sh | 2 -- azkaban/ali/ali_install_list_v2.sh | 2 -- azkaban/ali/ali_install_total_orc.sh | 2 -- azkaban/ali/ali_userinfo_activation_daily_all_job/ali_ios_userinfo_activation_daily.sh | 1 - azkaban/ali/ali_userinfo_activation_daily_all_job/ali_oaid_userinfo_activation_daily.sh | 1 - azkaban/ali/ali_userinfo_activation_daily_all_job/ali_userinfo_activation_daily.sh | 1 - azkaban/ali/ali_userinfo_postback_activation_daily.sh | 1 - azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_etl_postback_daily.sh | 1 - azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_extract_h_18_from_dsp_req.sh | 1 - azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_insert_other_data_to_dmp.sh | 1 - azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck.sh | 1 - azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_02.sh | 1 - azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_03.sh | 1 - azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_04.sh | 1 - azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_daily.sh | 1 - azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp.sh | 2 -- azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_02.sh | 2 -- azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_03.sh | 2 -- azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_04.sh | 2 -- azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df.sh | 2 -- azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_02.sh | 2 -- azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_03.sh | 2 -- azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_04.sh | 2 -- azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp.sh | 1 - azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_02.sh | 1 - azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_03.sh | 1 - azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_04.sh | 1 - azkaban/ali/btop/btop_daily.sh | 1 - azkaban/ali/cainixihuan/cainixihuan01.sh | 1 - azkaban/ali/cainixihuan/cainixihuan02.sh | 1 - azkaban/ali/cainixihuan/cainixihuan03.sh | 1 - azkaban/ali/cainixihuan/cainixihuan04.sh | 1 - azkaban/ali/cainixihuan/cainixihuan05.sh | 1 - azkaban/ali/etl_dealid_hour.sh | 1 - azkaban/ali/etl_lazada_data_daily.sh | 2 -- azkaban/ali/etl_lazada_ios_data_daily.sh | 1 - azkaban/ali/other_single_jobs/etl_com_tencent_news_daily.sh | 1 - azkaban/ali/reyun/reyun_daily.sh | 1 - azkaban/ali/reyun/reyun_label_baijiu.sh | 2 -- azkaban/ali/reyun/reyun_label_test.sh | 1 - azkaban/ali/uc_lahuo/uc_imei_lahuo_ck.sh | 1 - azkaban/ali/uc_lahuo/uc_lahuo_daily.sh | 1 - azkaban/ali/uc_lahuo/uc_lahuo_data_to_dmp.sh | 1 - azkaban/ali/uc_lahuo/uc_lahuo_df.sh | 2 -- azkaban/ali/uc_lahuo/uc_oaid_lahuo_ck.sh | 1 - azkaban/ali/uc_lahuo/uc_other_data_to_dmp.sh | 1 - azkaban/ali/uc_lahuo/uc_other_data_to_dmp_v2.sh | 1 - azkaban/ali/uc_lahuo_to_guangdiantong/ali_extract_h_32_from_dsp_req.sh | 1 - azkaban/ali/youku_laxin/youku_imei_laxin_ck.sh | 1 - azkaban/ali/youku_laxin/youku_laxin_daily.sh | 1 - azkaban/ali/youku_laxin/youku_laxin_data_to_dmp.sh | 2 -- azkaban/ali/youku_laxin/youku_laxin_df.sh | 2 -- azkaban/ali/youku_laxin/youku_oaid_laxin_ck.sh | 1 - azkaban/app_info/app_info_adr_3s.sh | 2 +- azkaban/app_info/app_info_ios_3s.sh | 2 +- azkaban/app_info/collect_package_name.sh | 1 - azkaban/app_info/etl_app_info_adr.sh | 4 +--- azkaban/app_info/etl_app_info_ios.sh | 4 +--- azkaban/app_info/import_campaign_tags.sh | 1 - azkaban/app_info/import_package_tags.sh | 1 - azkaban/app_info/package_black_list.sh | 1 - azkaban/app_tag/app_tag.sh | 1 - azkaban/appsflyer/appsflyer_total.sh | 1 - azkaban/bigmedia_domestic/bigmedia_domestic_launch_total.sh | 1 - azkaban/bytedance/bytedance_device_tag.sh | 2 -- azkaban/bytedance/bytedance_device_tag_daily.sh | 2 -- azkaban/bytedance/bytedance_device_tag_v2.sh | 2 -- azkaban/bytedance/bytedance_install_total_orc.sh | 2 -- azkaban/clever/adn_clever_device_tag.sh | 2 -- azkaban/clever/adn_clever_device_tag_daily.sh | 2 -- azkaban/clever/adn_clever_device_tag_v2.sh | 2 -- azkaban/clever/adn_clever_install_total_orc.sh | 2 -- azkaban/clever/adn_clever_install_v2.sh | 2 -- azkaban/datatory/datatory.sh | 2 -- azkaban/datatory/tracking/3s_tracking_event_daily.sh | 2 -- azkaban/datatory/tracking/3s_trackingnew_install_daily.sh | 2 -- azkaban/datatory/tracking/adn_tracking_ck.sh | 1 - azkaban/datatory/tracking/adn_trackingnew_event_daily.sh | 2 -- azkaban/datatory/tracking/adn_trackingnew_install_daily.sh | 2 -- azkaban/datatory/tracking/adn_trackingnew_merge_daily.sh | 2 -- azkaban/datatory/tracking/tracking_ck.sh | 1 - azkaban/datatory/tracking/trackingnew_merge_daily.sh | 2 -- azkaban/datatory/user_info/user_info_ck.sh | 1 - azkaban/dm/dm_active_tag_month.sh | 2 -- azkaban/dm/dm_active_tag_week.sh | 2 -- azkaban/dm/dm_interest_tag_all_v2.sh | 2 -- azkaban/dm/dm_interest_tag_all_v3.sh | 2 -- azkaban/dm/dmp_device_id_md5.sh | 1 - azkaban/dm/dmp_device_interest.sh | 2 -- azkaban/dm/dmp_device_tag_daily.sh | 1 - azkaban/dm/dmp_install_list_common.sh | 1 - azkaban/dm/dmp_install_list_daily.sh | 1 - azkaban/dm/dmp_install_list_daily_v2.sh | 1 - azkaban/dm/dmp_install_list_merge.sh | 1 - azkaban/dm/fix_dmp_install_list.sh | 1 - azkaban/dm/fix_install_list_ruid.sh | 1 - azkaban/dm/install_list_other_v2.sh | 2 -- azkaban/dm/install_other_device_tag.sh | 2 -- azkaban/dm/other_device_tag_v2.sh | 2 -- azkaban/dm/other_install_total_orc.sh | 2 -- azkaban/dm/pseudo_package_to_other_business/Age_Package_Names.sh | 1 - azkaban/dm/pseudo_package_to_other_business/Canglan_Package_Names.sh | 1 - azkaban/dm/pseudo_package_to_other_business/Three_Kingdoms_Game.sh | 1 - azkaban/dm/pseudo_package_to_other_business/adx_packagename_synchronize.sh | 1 - azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv.sh | 1 - azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv_gaid.sh | 1 - azkaban/dm/pseudo_package_to_other_business/cn_good_channel.sh | 1 - azkaban/dm/pseudo_package_to_other_business/com_eg_android_AlipayGphone_reyun.sh | 1 - azkaban/dm/pseudo_package_to_other_business/phone_wax_nobid_to_dmp.sh | 1 - azkaban/dm/pseudo_package_to_other_business/reyun_lahuo_list.sh | 1 - azkaban/dm/pseudo_package_to_other_business/rtdmp_normal.sh | 1 - azkaban/dm/pseudo_package_to_other_business/rtdmp_tmp_id1142110895.sh | 1 - azkaban/dm/pseudo_package_to_other_business/shinny.sh | 1 - azkaban/dmp_env.sh | 6 ------ azkaban/dmp_event_tag/dmp_event_tag.sh | 2 -- azkaban/dmp_event_tag/dmp_event_tag_pre.sh | 2 -- azkaban/dmp_event_tag/event_3s_postback_tag.sh | 1 - azkaban/dmp_event_tag/event_3s_tag.sh | 1 - azkaban/dmp_event_tag/event_ga_tag.sh | 1 - azkaban/dmp_event_tag/event_ss_tag.sh | 1 - azkaban/dsp/dsp_dc_interest.sh | 1 - azkaban/dsp/dsp_device_mapping.sh | 1 - azkaban/dsp/dsp_etl_daily_ext.sh | 1 - azkaban/dsp/dsp_org_etl_daily.sh | 1 - azkaban/dsp/dsp_org_etl_hours.sh | 1 - azkaban/dsp/dsp_req_device_tag.sh | 1 - azkaban/dsp/dsp_req_device_tag_daily.sh | 2 -- azkaban/dsp/dsp_req_device_tag_v2.sh | 2 -- azkaban/dsp/dsp_req_install_total_orc.sh | 1 - azkaban/dsp/dsp_req_pkg_total_v1.sh | 1 - azkaban/dsp/dsp_req_pkg_total_v2.sh | 2 -- azkaban/dsp/dsp_req_profile_total.sh | 2 -- azkaban/dsp/dsp_req_unmatch_install_total_orc.sh | 2 -- azkaban/dsp/impre_info_compl/log_adn_dsp_click_impression_combine.sh | 1 - azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_combine.sh | 1 - azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_cn.sh | 1 - azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_tk.sh | 1 - azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_vg.sh | 1 - azkaban/dsp/log_adn_dsp_impression_hour_full.sh | 1 - azkaban/dsp/tmp_extract_data_from_dsp_req.sh | 1 - azkaban/event_tag/Ga_purchase_event.sh | 2 -- azkaban/event_tag/behavior_thirdparty_datasource_manual_daily.sh | 1 - azkaban/event_tag/behavior_thirdparty_datasource_total.sh | 1 - azkaban/event_tag/event_tag.sh | 1 - azkaban/event_tag/event_tag_source.sh | 2 +- azkaban/facebook/facebook_daily.sh | 2 -- azkaban/facebook/facebook_device_tag.sh | 2 -- azkaban/facebook/facebook_device_tag_daily.sh | 2 -- azkaban/facebook/facebook_device_tag_v2.sh | 2 -- azkaban/facebook/facebook_install_total_orc.sh | 2 -- azkaban/facebook/facebook_total.sh | 1 - azkaban/fmp/fmp_insight.sh | 1 - azkaban/ga/ga_device_tag_v2.sh | 2 -- azkaban/ga/ga_install_device_tag.sh | 2 -- azkaban/ga/ga_install_device_tag_daily.sh | 2 -- azkaban/ga/ga_install_total_orc.sh | 2 -- azkaban/ga/ga_install_total_v2.sh | 2 -- azkaban/ga/ga_prepare.sh | 2 -- azkaban/gender/gender_thirdparty_datasource_manual_daily.sh | 1 - azkaban/gender/gender_thirdparty_datasource_total.sh | 1 - azkaban/gender/merge_device_gender.sh | 2 -- azkaban/gender/merge_install_gender.sh | 1 - azkaban/gender/merge_install_gender_v2.sh | 2 -- azkaban/install_ruid/etl_ruid_mapping.sh | 1 - azkaban/iqiyi/foractivation_qiyi_oppo.sh | 1 - azkaban/iqiyi/iqiyi_ck.sh | 1 - azkaban/iqiyi/iqiyi_daily.sh | 1 - azkaban/iqiyi/iqiyi_install_total_orc.sh | 2 -- azkaban/iqiyi/iqiyi_lahuo_ck.sh | 1 - azkaban/iqiyi/iqiyi_lahuo_daily.sh | 1 - azkaban/iqiyi/iqiyi_lahuo_df.sh | 1 - azkaban/iqiyi/iqiyi_tmp_daily_data_to_dmp.sh | 2 -- azkaban/joypac/joypac_device_tag.sh | 2 -- azkaban/joypac/joypac_device_tag_daily.sh | 2 -- azkaban/joypac/joypac_device_tag_v2.sh | 2 -- azkaban/joypac/joypac_install_total_orc.sh | 2 -- azkaban/joypac/joypac_result_all.sh | 2 -- azkaban/joypac/joypac_result_daily.sh | 2 -- azkaban/joypac/joypac_result_etl.sh | 1 - azkaban/joypac/joypac_user_info.sh | 2 -- azkaban/joypac/joypac_user_info_cassandra.sh | 2 -- azkaban/mp/mp_request_daily.sh | 1 - azkaban/mp/mp_request_device_tag.sh | 2 -- azkaban/mp/mp_request_device_tag_daily.sh | 2 -- azkaban/mp/mp_request_device_tag_v2.sh | 2 -- azkaban/mp/mp_request_install_list_v2.sh | 2 -- azkaban/mp/mp_request_install_total_orc.sh | 2 -- azkaban/mparticle/mparticle_install_total_orc.sh | 2 -- azkaban/output/reyun/user_info.sh | 4 ++-- azkaban/package/get_package.sh | 2 -- azkaban/package/merge_pkg_tag.sh | 2 -- azkaban/package/package_mapping.sh | 2 -- azkaban/realtime/cassandra_sink.sh | 2 -- azkaban/realtime/dm_realtime_service.sh | 1 - azkaban/realtime/dm_realtime_service_region.sh | 1 - azkaban/realtime/dmp_protal_job.sh | 1 - azkaban/realtime_v2/adn_etl_hour.sh | 1 - azkaban/realtime_v2/dsp_etl_hour.sh | 1 - azkaban/realtime_v2/etl_hour_ck.sh | 1 - azkaban/realtime_v2/realtime_hour_cassandra_sink.sh | 1 - azkaban/retarget_dealerid/dsp_dealer_daily.sh | 1 - azkaban/retarget_dealerid/dsp_dealer_device_tag.sh | 2 -- azkaban/rtdmp/device_info_calc.sh | 1 - azkaban/rtdmp/device_region_calc.sh | 2 -- azkaban/rtdmp/device_region_merge.sh | 2 -- azkaban/rtdmp/device_region_result.sh | 2 -- azkaban/rtdmp/device_region_write.sh | 1 - azkaban/rtdmp/lazada/etl_job.sh | 1 - azkaban/rtdmp/lazada/lazada_rtdmp.sh | 1 - azkaban/rtdmp/lazada/merge_install.sh | 1 - azkaban/rtdmp/rtdmp_as.sh | 1 - azkaban/rtdmp/rtdmp_merge.sh | 2 -- azkaban/rtdmp/rtdmp_repair.sh | 1 - azkaban/rtdmp/rtdmp_request.sh | 2 -- azkaban/rtdmp/rtdmp_request_iqiyi_adx.sh | 2 -- azkaban/setting/appid_package.sh | 2 -- azkaban/statistics/dm_device_tag_statistics.sh | 1 - azkaban/toutiao/dm_toutiao_launch_total.sh | 1 - azkaban/toutiao/toutiao_dmp_device_tag.sh | 2 -- azkaban/toutiao/toutiao_launch_device_tag.sh | 1 - azkaban/userinfo/ods_dmp_user_info_all.sh | 1 - azkaban/userinfo/ods_dmp_user_info_all_v2.sh | 1 - azkaban/userinfo/ods_dmp_user_info_daily.sh | 1 - azkaban/userinfo/ods_dmp_user_info_daily_v2.sh | 2 -- src/main/scala/mobvista/dmp/output/reyun/Constant.scala | 2 +- 282 files changed, 8 insertions(+), 419 deletions(-) diff --git a/azkaban/3s/3s_install_device_tag.sh b/azkaban/3s/3s_install_device_tag.sh index a19513f..6c47c9a 100644 --- a/azkaban/3s/3s_install_device_tag.sh +++ b/azkaban/3s/3s_install_device_tag.sh @@ -32,8 +32,6 @@ hadoop fs -rmr $store_output_path spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=20 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 4 \ ../${JAR} \ -date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 20 diff --git a/azkaban/3s/3s_install_device_tag_daily.sh b/azkaban/3s/3s_install_device_tag_daily.sh index 9e6f805..3e8d9f5 100644 --- a/azkaban/3s/3s_install_device_tag_daily.sh +++ b/azkaban/3s/3s_install_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.tracking_3s.TrackingTagDaily \ --conf spark.sql.shuffle.partitions=5 \ --conf spark.default.parallelism=5 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 3 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 2 diff --git a/azkaban/3s/3s_install_device_tag_v2.sh b/azkaban/3s/3s_install_device_tag_v2.sh index 732e501..762afcd 100644 --- a/azkaban/3s/3s_install_device_tag_v2.sh +++ b/azkaban/3s/3s_install_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=8 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4 diff --git a/azkaban/3s/3s_tracking_install_total_orc.sh b/azkaban/3s/3s_tracking_install_total_orc.sh index f918b25..fafe34b 100644 --- a/azkaban/3s/3s_tracking_install_total_orc.sh +++ b/azkaban/3s/3s_tracking_install_total_orc.sh @@ -38,8 +38,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=20 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 10 diff --git a/azkaban/3s_postback/3s_postback_daily.sh b/azkaban/3s_postback/3s_postback_daily.sh index 8cc609c..c23a1a4 100644 --- a/azkaban/3s_postback/3s_postback_daily.sh +++ b/azkaban/3s_postback/3s_postback_daily.sh @@ -40,7 +40,6 @@ spark-submit --class mobvista.dmp.datasource.postback_3s.PostBackDaily \ --conf spark.sql.shuffle.partitions=1000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 3 --num-executors 100 \ ../${JAR} -output ${OUTPUT_PATH} -coalesce 100 \ diff --git a/azkaban/3s_postback/3s_postback_device_tag.sh b/azkaban/3s_postback/3s_postback_device_tag.sh index a4c7801..74d9a19 100644 --- a/azkaban/3s_postback/3s_postback_device_tag.sh +++ b/azkaban/3s_postback/3s_postback_device_tag.sh @@ -28,8 +28,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.sql.shuffle.partitions=1000 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 3 --num-executors 40 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1000 diff --git a/azkaban/3s_postback/3s_postback_device_tag_daily.sh b/azkaban/3s_postback/3s_postback_device_tag_daily.sh index 747d46a..11f853c 100644 --- a/azkaban/3s_postback/3s_postback_device_tag_daily.sh +++ b/azkaban/3s_postback/3s_postback_device_tag_daily.sh @@ -28,8 +28,6 @@ spark-submit --class mobvista.dmp.datasource.postback_3s.PostBackTagDaily \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 5 diff --git a/azkaban/3s_postback/3s_postback_device_tag_v2.sh b/azkaban/3s_postback/3s_postback_device_tag_v2.sh index c1a886f..73d40ca 100644 --- a/azkaban/3s_postback/3s_postback_device_tag_v2.sh +++ b/azkaban/3s_postback/3s_postback_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=100 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 40 diff --git a/azkaban/3s_postback/3s_postback_install_total_orc.sh b/azkaban/3s_postback/3s_postback_install_total_orc.sh index d79d354..dc5d9f2 100644 --- a/azkaban/3s_postback/3s_postback_install_total_orc.sh +++ b/azkaban/3s_postback/3s_postback_install_total_orc.sh @@ -38,7 +38,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 25 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 200 diff --git a/azkaban/ad_server_campaign/merge_campaign_list.sh b/azkaban/ad_server_campaign/merge_campaign_list.sh index fc0cb6a..7cb6a4d 100644 --- a/azkaban/ad_server_campaign/merge_campaign_list.sh +++ b/azkaban/ad_server_campaign/merge_campaign_list.sh @@ -48,7 +48,6 @@ spark-submit --class mobvista.dmp.datasource.packagelist.MergeCampaignList \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 4 \ ../${JAR} -input_dmp_data_adn ${INPUT_DMP_DATA_ADN} -input_campaign_adn ${INPUT_CAMPAIGN_ADN} -coalesce 20 \ -output ${CAMPAIGN_TAG_OUTPUT} -today ${yester_year}-${yester_month}-${yester_day} || exit 1 diff --git a/azkaban/adn/device/adn_org_etl_hours_frankfurt.sh b/azkaban/adn/device/adn_org_etl_hours_frankfurt.sh index 9c6c03a..c8c6176 100644 --- a/azkaban/adn/device/adn_org_etl_hours_frankfurt.sh +++ b/azkaban/adn/device/adn_org_etl_hours_frankfurt.sh @@ -24,7 +24,6 @@ hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=2000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \ ../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region frankfurt || exit 1 diff --git a/azkaban/adn/device/adn_org_etl_hours_seoul.sh b/azkaban/adn/device/adn_org_etl_hours_seoul.sh index 69f5f62..8f792d7 100644 --- a/azkaban/adn/device/adn_org_etl_hours_seoul.sh +++ b/azkaban/adn/device/adn_org_etl_hours_seoul.sh @@ -24,7 +24,6 @@ hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=2000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \ ../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region seoul || exit 1 diff --git a/azkaban/adn/device/adn_org_etl_hours_singapore.sh b/azkaban/adn/device/adn_org_etl_hours_singapore.sh index e1f06b4..b572ea1 100644 --- a/azkaban/adn/device/adn_org_etl_hours_singapore.sh +++ b/azkaban/adn/device/adn_org_etl_hours_singapore.sh @@ -23,7 +23,6 @@ hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=2000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 50 \ ../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 400 -region singapore || exit 1 diff --git a/azkaban/adn/device/adn_org_etl_hours_virginia.sh b/azkaban/adn/device/adn_org_etl_hours_virginia.sh index 74980a0..2092d62 100644 --- a/azkaban/adn/device/adn_org_etl_hours_virginia.sh +++ b/azkaban/adn/device/adn_org_etl_hours_virginia.sh @@ -24,7 +24,6 @@ hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=2000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \ ../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region virginia || exit 1 diff --git a/azkaban/adn/package/adn_install_device_tag.sh b/azkaban/adn/package/adn_install_device_tag.sh index 4549021..459d6f9 100644 --- a/azkaban/adn/package/adn_install_device_tag.sh +++ b/azkaban/adn/package/adn_install_device_tag.sh @@ -32,8 +32,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=20 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 5 \ ../../${JAR} \ -date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 20 diff --git a/azkaban/adn/package/adn_install_device_tag_daily.sh b/azkaban/adn/package/adn_install_device_tag_daily.sh index 6cd7e7e..77d1f54 100644 --- a/azkaban/adn/package/adn_install_device_tag_daily.sh +++ b/azkaban/adn/package/adn_install_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn.AdnInstallTagDaily \ --conf spark.sql.shuffle.partitions=50 \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 4 --num-executors 5 \ ../../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1 diff --git a/azkaban/adn/package/adn_install_device_tag_v2.sh b/azkaban/adn/package/adn_install_device_tag_v2.sh index 0f8d014..ae20260 100644 --- a/azkaban/adn/package/adn_install_device_tag_v2.sh +++ b/azkaban/adn/package/adn_install_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=8 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4 diff --git a/azkaban/adn/package/adn_install_total_orc.sh b/azkaban/adn/package/adn_install_total_orc.sh index f25dfb1..baac730 100644 --- a/azkaban/adn/package/adn_install_total_orc.sh +++ b/azkaban/adn/package/adn_install_total_orc.sh @@ -32,8 +32,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.sql.shuffle.partitions=400 \ --conf spark.default.parallelism=400 \ --conf spark.kryoserializer.buffer.max=256m \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 5 \ ../../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 100 diff --git a/azkaban/adn/package/adn_install_total_v1.sh b/azkaban/adn/package/adn_install_total_v1.sh index 13d82dd..2fba1d0 100644 --- a/azkaban/adn/package/adn_install_total_v1.sh +++ b/azkaban/adn/package/adn_install_total_v1.sh @@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 40 \ ../../${JAR} \ -date ${date} -coalesce 200 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING} diff --git a/azkaban/adn/package/adn_install_total_v2.sh b/azkaban/adn/package/adn_install_total_v2.sh index 88f462c..4275fd2 100644 --- a/azkaban/adn/package/adn_install_total_v2.sh +++ b/azkaban/adn/package/adn_install_total_v2.sh @@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} \ -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/adn/package/adn_request_device_tag.sh b/azkaban/adn/package/adn_request_device_tag.sh index 0738169..6839fea 100644 --- a/azkaban/adn/package/adn_request_device_tag.sh +++ b/azkaban/adn/package/adn_request_device_tag.sh @@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.sql.shuffle.partitions=4000 \ --conf spark.default.parallelism=4000 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 100 \ ../../${JAR} \ -date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 400 diff --git a/azkaban/adn/package/adn_request_device_tag_daily.sh b/azkaban/adn/package/adn_request_device_tag_daily.sh index cf7622a..f8287dd 100644 --- a/azkaban/adn/package/adn_request_device_tag_daily.sh +++ b/azkaban/adn/package/adn_request_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn.AdnRequestSdkTagDaily \ --conf spark.sql.shuffle.partitions=20 \ --conf spark.default.parallelism=5 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 2 \ ../../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 20 diff --git a/azkaban/adn/package/adn_request_device_tag_v2.sh b/azkaban/adn/package/adn_request_device_tag_v2.sh index 60d51a1..c48f3bc 100644 --- a/azkaban/adn/package/adn_request_device_tag_v2.sh +++ b/azkaban/adn/package/adn_request_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=200 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 10 \ ../../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100 diff --git a/azkaban/adn/package/adn_request_install_total_orc.sh b/azkaban/adn/package/adn_request_install_total_orc.sh index b83858f..64d652b 100644 --- a/azkaban/adn/package/adn_request_install_total_orc.sh +++ b/azkaban/adn/package/adn_request_install_total_orc.sh @@ -32,7 +32,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.default.parallelism=2000 \ --conf spark.kryoserializer.buffer.max=256m \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 18g --driver-memory 4g --executor-cores 5 --num-executors 60 \ ../../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 1000 diff --git a/azkaban/adn/package/adn_request_other_daily.sh b/azkaban/adn/package/adn_request_other_daily.sh index 6a1dadc..d7de567 100644 --- a/azkaban/adn/package/adn_request_other_daily.sh +++ b/azkaban/adn/package/adn_request_other_daily.sh @@ -32,7 +32,6 @@ spark-submit --class mobvista.dmp.datasource.adn_request_other.EtlAdnRequestOthe --conf spark.storage.memoryFraction=0.4 \ --conf spark.driver.maxResultSize=5g \ --conf spark.executor.extraJavaOptions=-XX:+UseG1GC \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 5 --num-executors 40 \ ../../${JAR} -input "${INPUT_PATH}/*/*" -output $OUTPUT_PATH if [ $? -ne 0 ]; then diff --git a/azkaban/adn/package/adn_request_other_device_tag.sh b/azkaban/adn/package/adn_request_other_device_tag.sh index b71e065..a2435cc 100644 --- a/azkaban/adn/package/adn_request_other_device_tag.sh +++ b/azkaban/adn/package/adn_request_other_device_tag.sh @@ -33,8 +33,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.yarn.executor.memoryOverhead=3072 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 80 \ ../../${JAR} \ -date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 2000 diff --git a/azkaban/adn/package/adn_request_other_device_tag_daily.sh b/azkaban/adn/package/adn_request_other_device_tag_daily.sh index 704b58e..26adc22 100644 --- a/azkaban/adn/package/adn_request_other_device_tag_daily.sh +++ b/azkaban/adn/package/adn_request_other_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn_request_other.AdnRequestOtherTa --conf spark.sql.shuffle.partitions=20 \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 10 diff --git a/azkaban/adn/package/adn_request_other_device_tag_v2.sh b/azkaban/adn/package/adn_request_other_device_tag_v2.sh index 3158428..40135c5 100644 --- a/azkaban/adn/package/adn_request_other_device_tag_v2.sh +++ b/azkaban/adn/package/adn_request_other_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=200 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 20 \ ../../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100 diff --git a/azkaban/adn/package/adn_request_other_install.sh b/azkaban/adn/package/adn_request_other_install.sh index 7453ed9..57d5705 100644 --- a/azkaban/adn/package/adn_request_other_install.sh +++ b/azkaban/adn/package/adn_request_other_install.sh @@ -27,7 +27,6 @@ hadoop fs -rm -r "$OUTPUT_PATH" spark-submit --class mobvista.dmp.datasource.adn_request_other.AdnRequestOtherInstall \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.sql.shuffle.partitions=2000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 200 \ ../../${JAR} \ -input "${INPUT_PATH}" -output $OUTPUT_PATH -date $date -oldInput $OLD_INPUT_PATH -parallelism 2000 -coalesce 2000 diff --git a/azkaban/adn/package/adn_request_other_install_total_orc.sh b/azkaban/adn/package/adn_request_other_install_total_orc.sh index f136c6c..607e6be 100644 --- a/azkaban/adn/package/adn_request_other_install_total_orc.sh +++ b/azkaban/adn/package/adn_request_other_install_total_orc.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 400 diff --git a/azkaban/adn/package/adn_request_other_install_v1.sh b/azkaban/adn/package/adn_request_other_install_v1.sh index dce21b4..7f6b113 100644 --- a/azkaban/adn/package/adn_request_other_install_v1.sh +++ b/azkaban/adn/package/adn_request_other_install_v1.sh @@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../../${JAR} \ -date ${date} -coalesce 1000 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING} diff --git a/azkaban/adn/package/adn_request_other_install_v2.sh b/azkaban/adn/package/adn_request_other_install_v2.sh index 1f54f45..ab5673a 100644 --- a/azkaban/adn/package/adn_request_other_install_v2.sh +++ b/azkaban/adn/package/adn_request_other_install_v2.sh @@ -38,8 +38,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \ ../../${JAR} \ -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/adn/package/adn_request_pkg_total_v1.sh b/azkaban/adn/package/adn_request_pkg_total_v1.sh index 353882d..8d03b66 100644 --- a/azkaban/adn/package/adn_request_pkg_total_v1.sh +++ b/azkaban/adn/package/adn_request_pkg_total_v1.sh @@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../../${JAR} \ -date ${date} -coalesce 1000 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING} diff --git a/azkaban/adn/package/adn_request_pkg_total_v2.sh b/azkaban/adn/package/adn_request_pkg_total_v2.sh index 1decd57..12af3aa 100644 --- a/azkaban/adn/package/adn_request_pkg_total_v2.sh +++ b/azkaban/adn/package/adn_request_pkg_total_v2.sh @@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 2 --num-executors 40 \ ../../${JAR} \ -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/adn/package/adn_request_unmatch_install_total_orc.sh b/azkaban/adn/package/adn_request_unmatch_install_total_orc.sh index 9c4fb96..aa1b6df 100644 --- a/azkaban/adn/package/adn_request_unmatch_install_total_orc.sh +++ b/azkaban/adn/package/adn_request_unmatch_install_total_orc.sh @@ -30,8 +30,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC " \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 4 --num-executors 50 \ ../../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 200 diff --git a/azkaban/adn_adx/adn_tencent_adx_device_tag.sh b/azkaban/adn_adx/adn_tencent_adx_device_tag.sh index 92315ab..85d751f 100644 --- a/azkaban/adn_adx/adn_tencent_adx_device_tag.sh +++ b/azkaban/adn_adx/adn_tencent_adx_device_tag.sh @@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.datasource.adn_adx.AdnAdxDeviceTag \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=10 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name AdnAdxDeviceTag --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 32 \ ../${JAR} -outputadxdevtag ${OUTPUT_ADN_ADX_DEVICE_TAG_PATH} \ diff --git a/azkaban/adn_adx/adn_tencent_adx_package.sh b/azkaban/adn_adx/adn_tencent_adx_package.sh index 279a07c..ccafa93 100644 --- a/azkaban/adn_adx/adn_tencent_adx_package.sh +++ b/azkaban/adn_adx/adn_tencent_adx_package.sh @@ -34,7 +34,6 @@ spark-submit --class mobvista.dmp.datasource.adn_adx.AdnTecentAdxDataMidWay \ --conf spark.default.parallelism=1000 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar,s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/json-serde-1.3.7-jar-with-dependencies.jar \ --master yarn --deploy-mode cluster --name AdnTecentAdxDataMidWay --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../${JAR} -outputadxtmp ${OUTPUT_ODS_ADX_TMP_PATH} -dimadxpkg ${OUTPUT_DIM_ADN_ADX_PKG_PATH} \ diff --git a/azkaban/adn_sdk/adn_sdk_daily.sh b/azkaban/adn_sdk/adn_sdk_daily.sh index 0f247ee..1f09ecc 100644 --- a/azkaban/adn_sdk/adn_sdk_daily.sh +++ b/azkaban/adn_sdk/adn_sdk_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn_sdk.AdnSdkDaily \ --conf spark.driver.maxResultSize=8g \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.app.coalesce=60000 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --name adn_sdk_daily --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 200 \ ../${JAR} diff --git a/azkaban/adn_sdk/adn_sdk_device_tag.sh b/azkaban/adn_sdk/adn_sdk_device_tag.sh index f472133..8ac40b1 100644 --- a/azkaban/adn_sdk/adn_sdk_device_tag.sh +++ b/azkaban/adn_sdk/adn_sdk_device_tag.sh @@ -34,8 +34,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.yarn.executor.memoryOverhead=4096 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 3 --num-executors 200 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 2000 diff --git a/azkaban/adn_sdk/adn_sdk_device_tag_daily.sh b/azkaban/adn_sdk/adn_sdk_device_tag_daily.sh index d9c63e1..74a8832 100644 --- a/azkaban/adn_sdk/adn_sdk_device_tag_daily.sh +++ b/azkaban/adn_sdk/adn_sdk_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn_sdk.AdnSdkTagDaily \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 10 diff --git a/azkaban/adn_sdk/adn_sdk_device_tag_v2.sh b/azkaban/adn_sdk/adn_sdk_device_tag_v2.sh index e4f425c..5d33216 100644 --- a/azkaban/adn_sdk/adn_sdk_device_tag_v2.sh +++ b/azkaban/adn_sdk/adn_sdk_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=1000 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100 diff --git a/azkaban/adn_sdk/adn_sdk_install_total_orc.sh b/azkaban/adn_sdk/adn_sdk_install_total_orc.sh index 88d3631..cd307a9 100644 --- a/azkaban/adn_sdk/adn_sdk_install_total_orc.sh +++ b/azkaban/adn_sdk/adn_sdk_install_total_orc.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=4000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 100 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 2000 diff --git a/azkaban/adn_sdk/adn_sdk_install_v1.sh b/azkaban/adn_sdk/adn_sdk_install_v1.sh index e6aeede..6eb428c 100644 --- a/azkaban/adn_sdk/adn_sdk_install_v1.sh +++ b/azkaban/adn_sdk/adn_sdk_install_v1.sh @@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 80 \ ../${JAR} \ -date ${date} -coalesce 1000 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING} diff --git a/azkaban/adn_sdk/adn_sdk_install_v2.sh b/azkaban/adn_sdk/adn_sdk_install_v2.sh index f73b068..29e6b0d 100644 --- a/azkaban/adn_sdk/adn_sdk_install_v2.sh +++ b/azkaban/adn_sdk/adn_sdk_install_v2.sh @@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 80 \ ../${JAR} \ -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/adn_sdk/adn_sdk_v2_device_tag.sh b/azkaban/adn_sdk/adn_sdk_v2_device_tag.sh index ed80739..838cccd 100644 --- a/azkaban/adn_sdk/adn_sdk_v2_device_tag.sh +++ b/azkaban/adn_sdk/adn_sdk_v2_device_tag.sh @@ -34,8 +34,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.yarn.executor.memoryOverhead=4096 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 150 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 2000 diff --git a/azkaban/adn_sdk/adn_sdk_v2_device_tag_v2.sh b/azkaban/adn_sdk/adn_sdk_v2_device_tag_v2.sh index 0b2dcdf..30dcd40 100644 --- a/azkaban/adn_sdk/adn_sdk_v2_device_tag_v2.sh +++ b/azkaban/adn_sdk/adn_sdk_v2_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=100 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 20 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100 diff --git a/azkaban/adn_sdk/adn_sdk_v2_install_total_orc.sh b/azkaban/adn_sdk/adn_sdk_v2_install_total_orc.sh index f3a9ff8..a48fe30 100644 --- a/azkaban/adn_sdk/adn_sdk_v2_install_total_orc.sh +++ b/azkaban/adn_sdk/adn_sdk_v2_install_total_orc.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=500 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 200 diff --git a/azkaban/adn_sdk/adn_sdk_v2_install_v1.sh b/azkaban/adn_sdk/adn_sdk_v2_install_v1.sh index c590438..d2d27e5 100644 --- a/azkaban/adn_sdk/adn_sdk_v2_install_v1.sh +++ b/azkaban/adn_sdk/adn_sdk_v2_install_v1.sh @@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 40 \ ../${JAR} \ -date ${date} -coalesce 200 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING} diff --git a/azkaban/adn_sdk/adn_sdk_v2_install_v2.sh b/azkaban/adn_sdk/adn_sdk_v2_install_v2.sh index 12f2ea8..b4dbacb 100644 --- a/azkaban/adn_sdk/adn_sdk_v2_install_v2.sh +++ b/azkaban/adn_sdk/adn_sdk_v2_install_v2.sh @@ -43,8 +43,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 80 \ ../${JAR} \ -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/age/get_dsp_all.sh b/azkaban/age/get_dsp_all.sh index 2052c2a..fbde08d 100644 --- a/azkaban/age/get_dsp_all.sh +++ b/azkaban/age/get_dsp_all.sh @@ -36,8 +36,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.GetAgeGender \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.default.parallelism=2000 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 5 --num-executors 60 \ ../${JAR} -ageOutput ${AGE_OUTPUT_PATH} -genderOutput ${GENDER_OUTPUT_PATH} -date ${LOG_TIME} -business ${business} diff --git a/azkaban/age/get_ga_all.sh b/azkaban/age/get_ga_all.sh index f5dc7bc..b6a86c3 100644 --- a/azkaban/age/get_ga_all.sh +++ b/azkaban/age/get_ga_all.sh @@ -58,8 +58,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.GetAgeGender \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.5 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} -ageOutput ${AGE_OUTPUT_PATH} -genderOutput ${GENDER_OUTPUT_PATH} -date ${GA_TOTAL_DATE} -business ${business} diff --git a/azkaban/age/merge_install_age.sh b/azkaban/age/merge_install_age.sh index 99498a6..ec27ae4 100644 --- a/azkaban/age/merge_install_age.sh +++ b/azkaban/age/merge_install_age.sh @@ -52,7 +52,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.MergeInstallAge \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 60 \ ../${JAR} -date ${LOG_TIME} \ -ga_age_path ${GA_AGE_PATH} -dsp_age_path ${DSP_AGE_PATH} -age_output ${OUTPUT_PATH} -parallelism 2000 diff --git a/azkaban/ali/TO/TO_daily.sh b/azkaban/ali/TO/TO_daily.sh index 5cfb266..0f53f54 100644 --- a/azkaban/ali/TO/TO_daily.sh +++ b/azkaban/ali/TO/TO_daily.sh @@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.datasource.TO.TODaily \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 20 \ ../../${JAR} \ -output ${OUTPUT_PATH} -coalesce 200 -dt_dash_today ${dt_dash_today} diff --git a/azkaban/ali/ali_ck.sh b/azkaban/ali/ali_ck.sh index ccd6642..6c5701a 100644 --- a/azkaban/ali/ali_ck.sh +++ b/azkaban/ali/ali_ck.sh @@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.baichuan.BaiChuanJob \ --conf spark.default.parallelism=100 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 6g --executor-cores 2 --num-executors 20 \ ../${JAR} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/ali/ali_daily.sh b/azkaban/ali/ali_daily.sh index 2e4b6d4..42213f0 100644 --- a/azkaban/ali/ali_daily.sh +++ b/azkaban/ali/ali_daily.sh @@ -24,7 +24,6 @@ spark-submit --class mobvista.dmp.datasource.baichuan.AliDaily \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 5 \ ../${JAR} -date ${LOG_TIME} -partNum 10 -output ${OUTPUT_PATH} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/ali/ali_device_tag.sh b/azkaban/ali/ali_device_tag.sh index c1e4d4f..0bd2e0b 100644 --- a/azkaban/ali/ali_device_tag.sh +++ b/azkaban/ali/ali_device_tag.sh @@ -36,8 +36,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=100 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 6g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 40 diff --git a/azkaban/ali/ali_device_tag_daily.sh b/azkaban/ali/ali_device_tag_daily.sh index 886ce46..32f918d 100644 --- a/azkaban/ali/ali_device_tag_daily.sh +++ b/azkaban/ali/ali_device_tag_daily.sh @@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.baichuan.AliTagDaily \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 5 diff --git a/azkaban/ali/ali_device_tag_v2.sh b/azkaban/ali/ali_device_tag_v2.sh index 2bf5392..5f6ca0e 100644 --- a/azkaban/ali/ali_device_tag_v2.sh +++ b/azkaban/ali/ali_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=8 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 8 diff --git a/azkaban/ali/ali_install_list_v2.sh b/azkaban/ali/ali_install_list_v2.sh index 4ff7744..36d77c4 100644 --- a/azkaban/ali/ali_install_list_v2.sh +++ b/azkaban/ali/ali_install_list_v2.sh @@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 80 \ ../${JAR} \ -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/ali/ali_install_total_orc.sh b/azkaban/ali/ali_install_total_orc.sh index f284120..c17b4fa 100644 --- a/azkaban/ali/ali_install_total_orc.sh +++ b/azkaban/ali/ali_install_total_orc.sh @@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=400 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 100 diff --git a/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_ios_userinfo_activation_daily.sh b/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_ios_userinfo_activation_daily.sh index 4be491c..035ced9 100644 --- a/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_ios_userinfo_activation_daily.sh +++ b/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_ios_userinfo_activation_daily.sh @@ -34,7 +34,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliIosActivitionDaily \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 60 \ ../../${JAR} -output ${OUTPUT_PATH} -outputdaily ${ALI_OUTPUT_DAILY_PATH} -coalesce 500 \ -yesterday ${yesterday} -today ${dt_today} -dt_dash_today ${dt_dash_today} -dt_dash_rec14day ${dt_dash_rec14day} \ diff --git a/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_oaid_userinfo_activation_daily.sh b/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_oaid_userinfo_activation_daily.sh index b0bb4f3..3e30453 100644 --- a/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_oaid_userinfo_activation_daily.sh +++ b/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_oaid_userinfo_activation_daily.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliOaidActivitionDaily \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 60 \ ../../${JAR} -output ${OUTPUT_PATH} -outputdaily ${ALI_OAID_OUTPUT_DAILY_PATH} -coalesce 500 \ -yesterday ${yesterday} -today ${dt_today} -dt_dash_today ${dt_dash_today} -dt_dash_rec14day ${dt_dash_rec14day} \ diff --git a/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_userinfo_activation_daily.sh b/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_userinfo_activation_daily.sh index f345228..27811cf 100644 --- a/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_userinfo_activation_daily.sh +++ b/azkaban/ali/ali_userinfo_activation_daily_all_job/ali_userinfo_activation_daily.sh @@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliActivitionDaily \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 60 \ ../../${JAR} -output ${OUTPUT_PATH} -outputdaily ${ALI_OUTPUT_DAILY_PATH} -coalesce 500 \ -yesterday ${yesterday} -today ${dt_today} -dt_dash_today ${dt_dash_today} -dt_dash_rec14day ${dt_dash_rec14day} \ diff --git a/azkaban/ali/ali_userinfo_postback_activation_daily.sh b/azkaban/ali/ali_userinfo_postback_activation_daily.sh index db1a5bb..caf8edd 100644 --- a/azkaban/ali/ali_userinfo_postback_activation_daily.sh +++ b/azkaban/ali/ali_userinfo_postback_activation_daily.sh @@ -84,7 +84,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliActivitionPostBackDail --conf spark.sql.shuffle.partitions=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 60 \ ../${JAR} -output ${OUTPUT_PATH} -iosoutput ${ALI_IOS_OUTPUT} -oaidoutput ${ALI_OAID_OUTPUT} -coalesce 50 \ -today ${dt_today} -update_date ${dt_dash_today} \ diff --git a/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_etl_postback_daily.sh b/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_etl_postback_daily.sh index fb791a0..d12a150 100644 --- a/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_etl_postback_daily.sh +++ b/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_etl_postback_daily.sh @@ -66,7 +66,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliActivitionPostBackDail --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ --conf spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive=true \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 70 \ ../../${JAR} -output ${OUTPUT_PATH} -iosoutput ${ALI_IOS_OUTPUT} -oaidoutput ${ALI_OAID_OUTPUT} -coalesce 300 \ -today ${dt_today} -update_date ${dt_dash_today} -dt_taobao_postback_day ${dt_taobao_postback_day}\ diff --git a/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_extract_h_18_from_dsp_req.sh b/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_extract_h_18_from_dsp_req.sh index debcd7d..079f1c5 100644 --- a/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_extract_h_18_from_dsp_req.sh +++ b/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_extract_h_18_from_dsp_req.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlH18FromDmInstallListV2 \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 60 \ ../../${JAR} \ -h18_imei ${IMEI_H_18_GUANGDIANTONG_RES_PATH} -h18_imeimd5 ${IMEIMD5_H_18_GUANGDIANTONG_RES_PATH} \ diff --git a/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_insert_other_data_to_dmp.sh b/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_insert_other_data_to_dmp.sh index be18463..470e7e0 100644 --- a/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_insert_other_data_to_dmp.sh +++ b/azkaban/ali/ali_userinfo_postback_activation_daily_v2/ali_insert_other_data_to_dmp.sh @@ -43,7 +43,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlOtherDataFromPostBackDail --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 150 \ ../../${JAR} \ -output ${OUTPUT_PATH} \ diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck.sh index 21bce80..7e2732f 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck.sh @@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayImeiLaHuoCK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} -hour ${hour} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_02.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_02.sh index e99ec86..b7d16ba 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_02.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_02.sh @@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayImeiLaHuoCK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} -hour ${hour} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_03.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_03.sh index 011243b..48fc411 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_03.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_03.sh @@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayImeiLaHuoCK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} -hour ${hour} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_04.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_04.sh index 7ac07e7..637d3cb 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_04.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_imei_lahuo_ck_04.sh @@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayImeiLaHuoCK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} -hour ${hour} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_daily.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_daily.sh index 1bf72ad..76d8416 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_daily.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_daily.sh @@ -37,7 +37,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDaily \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 120 ../../${JAR} \ -imeioutput "${ALIPAY_IMEIMD5_OUTPUT_PATH}" \ -today ${dt_today} -last_req_day ${last_req_day} -dt_after_one_day ${dt_after_one_day}\ diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp.sh index f3e59eb..e7f75b1 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp.sh @@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayTmpDataToDmp \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ ../../${JAR} -imeiRequestInput ${IMEIMD5_REQUEST_INPUT_PATH} -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} \ -output01 ${OUTPUT01} -output02 ${OUTPUT02} @@ -63,7 +62,6 @@ fi # --conf spark.sql.broadcastTimeout=1200 \ # --conf spark.yarn.executor.memoryOverhead=4096 \ # --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ -# --files ${HIVE_SITE_PATH} \ # --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ # ../../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \ # -ActivationOutput ${ACTIVATIONOUTPUT} -AcquisitionOutput ${ACQUISITIONOUTPUT} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_02.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_02.sh index d5be502..4bb0b0e 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_02.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_02.sh @@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayTmpDataToDmp \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ ../../${JAR} -imeiRequestInput ${IMEIMD5_REQUEST_INPUT_PATH} -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} \ -output01 ${OUTPUT01} -output02 ${OUTPUT02} @@ -63,7 +62,6 @@ fi # --conf spark.sql.broadcastTimeout=1200 \ # --conf spark.yarn.executor.memoryOverhead=4096 \ # --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ -# --files ${HIVE_SITE_PATH} \ # --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ # ../../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \ # -ActivationOutput ${ACTIVATIONOUTPUT} -AcquisitionOutput ${ACQUISITIONOUTPUT} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_03.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_03.sh index 78cefe6..cfe658c 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_03.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_03.sh @@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayTmpDataToDmp \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ ../../${JAR} -imeiRequestInput ${IMEIMD5_REQUEST_INPUT_PATH} -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} \ -output01 ${OUTPUT01} -output02 ${OUTPUT02} @@ -63,7 +62,6 @@ fi # --conf spark.sql.broadcastTimeout=1200 \ # --conf spark.yarn.executor.memoryOverhead=4096 \ # --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ -# --files ${HIVE_SITE_PATH} \ # --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ # ../../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \ # -ActivationOutput ${ACTIVATIONOUTPUT} -AcquisitionOutput ${ACQUISITIONOUTPUT} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_04.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_04.sh index 04b01ea..e0f7765 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_04.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_data_to_dmp_04.sh @@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayTmpDataToDmp \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ ../../${JAR} -imeiRequestInput ${IMEIMD5_REQUEST_INPUT_PATH} -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} \ -output01 ${OUTPUT01} -output02 ${OUTPUT02} @@ -63,7 +62,6 @@ fi # --conf spark.sql.broadcastTimeout=1200 \ # --conf spark.yarn.executor.memoryOverhead=4096 \ # --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ -# --files ${HIVE_SITE_PATH} \ # --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ # ../../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \ # -ActivationOutput ${ACTIVATIONOUTPUT} -AcquisitionOutput ${ACQUISITIONOUTPUT} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df.sh index 3f0a2d9..ca6ce5d 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df.sh @@ -20,8 +20,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDF \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 10 \ ../../${JAR} -date ${LOG_TIME} -dt_end_days ${dt_end_days} -partNum 60 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -cluster 'cluster_1st' -hour ${hour} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_02.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_02.sh index ef63fc4..6643f33 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_02.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_02.sh @@ -19,8 +19,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDF \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 10 \ ../../${JAR} -date ${LOG_TIME} -dt_end_days ${dt_end_days} -partNum 60 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -cluster 'cluster_1st' -hour ${hour} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_03.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_03.sh index 9cf2b46..06b34b1 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_03.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_03.sh @@ -19,8 +19,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDF \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 10 \ ../../${JAR} -date ${LOG_TIME} -dt_end_days ${dt_end_days} -partNum 60 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -cluster 'cluster_1st' -hour ${hour} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_04.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_04.sh index 4fb026e..38c7fc4 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_04.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_lahuo_df_04.sh @@ -19,8 +19,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDF \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 10 \ ../../${JAR} -date ${LOG_TIME} -dt_end_days ${dt_end_days} -partNum 60 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -cluster 'cluster_1st' -hour ${hour} diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp.sh index a22c3ad..06b931c 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayOtherDataToDmp \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 140 \ ../../${JAR} \ -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} \ diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_02.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_02.sh index 12993e5..76a49c0 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_02.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_02.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayOtherDataToDmp \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 140 \ ../../${JAR} \ -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} \ diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_03.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_03.sh index a731f1d..b902019 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_03.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_03.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayOtherDataToDmp \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 140 \ ../../${JAR} \ -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} \ diff --git a/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_04.sh b/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_04.sh index 04a9724..dd84014 100644 --- a/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_04.sh +++ b/azkaban/ali/alipay_lahuo_laxin/alipay_other_data_to_dmp_04.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayOtherDataToDmp \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 140 \ ../../${JAR} \ -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} \ diff --git a/azkaban/ali/btop/btop_daily.sh b/azkaban/ali/btop/btop_daily.sh index 0caaa63..5fe2ca5 100644 --- a/azkaban/ali/btop/btop_daily.sh +++ b/azkaban/ali/btop/btop_daily.sh @@ -45,7 +45,6 @@ spark-submit --class mobvista.dmp.datasource.btop.BtopDaily \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 60 \ ../../${JAR} \ -output ${OUTPUT_PATH} -coalesce 200 -dt_today ${dt_today} -dt_dash_tow_days ${dt_dash_tow_days} -seven_days_ago ${seven_days_ago} -fifteen_days_ago ${fifteen_days_ago} \ diff --git a/azkaban/ali/cainixihuan/cainixihuan01.sh b/azkaban/ali/cainixihuan/cainixihuan01.sh index 3503a49..29a1fe0 100644 --- a/azkaban/ali/cainixihuan/cainixihuan01.sh +++ b/azkaban/ali/cainixihuan/cainixihuan01.sh @@ -22,7 +22,6 @@ spark-submit \ --class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \ --conf spark.network.timeout=720s \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --name cainixihuan_AppInfoCrawlerSpark \ diff --git a/azkaban/ali/cainixihuan/cainixihuan02.sh b/azkaban/ali/cainixihuan/cainixihuan02.sh index f109a3c..bcc5cde 100644 --- a/azkaban/ali/cainixihuan/cainixihuan02.sh +++ b/azkaban/ali/cainixihuan/cainixihuan02.sh @@ -25,7 +25,6 @@ spark-submit \ --class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \ --conf spark.network.timeout=720s \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --name cainixihuan_AppInfoCrawlerSpark \ diff --git a/azkaban/ali/cainixihuan/cainixihuan03.sh b/azkaban/ali/cainixihuan/cainixihuan03.sh index d0eefd4..dacf010 100644 --- a/azkaban/ali/cainixihuan/cainixihuan03.sh +++ b/azkaban/ali/cainixihuan/cainixihuan03.sh @@ -24,7 +24,6 @@ spark-submit \ --class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \ --conf spark.network.timeout=720s \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --name cainixihuan_AppInfoCrawlerSpark \ diff --git a/azkaban/ali/cainixihuan/cainixihuan04.sh b/azkaban/ali/cainixihuan/cainixihuan04.sh index 3a38e61..b96fd42 100644 --- a/azkaban/ali/cainixihuan/cainixihuan04.sh +++ b/azkaban/ali/cainixihuan/cainixihuan04.sh @@ -24,7 +24,6 @@ spark-submit \ --class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \ --conf spark.network.timeout=720s \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --name cainixihuan_AppInfoCrawlerSpark \ diff --git a/azkaban/ali/cainixihuan/cainixihuan05.sh b/azkaban/ali/cainixihuan/cainixihuan05.sh index cc31125..d095edb 100644 --- a/azkaban/ali/cainixihuan/cainixihuan05.sh +++ b/azkaban/ali/cainixihuan/cainixihuan05.sh @@ -24,7 +24,6 @@ spark-submit \ --class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \ --conf spark.network.timeout=720s \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --name cainixihuan_AppInfoCrawlerSpark \ diff --git a/azkaban/ali/etl_dealid_hour.sh b/azkaban/ali/etl_dealid_hour.sh index ef4cbd8..7f95cee 100644 --- a/azkaban/ali/etl_dealid_hour.sh +++ b/azkaban/ali/etl_dealid_hour.sh @@ -36,7 +36,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlDealidDaily \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 6 --num-executors 30 \ ../${JAR} -dt_dash_today ${dt_dash_today} \ -oppooutput ${OPPO_OUTPUT} \ diff --git a/azkaban/ali/etl_lazada_data_daily.sh b/azkaban/ali/etl_lazada_data_daily.sh index 6182e1d..5e08927 100644 --- a/azkaban/ali/etl_lazada_data_daily.sh +++ b/azkaban/ali/etl_lazada_data_daily.sh @@ -62,7 +62,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlLazadaActivitionDaily \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 90 ../${JAR} \ -gaidoutput "${GAID_OUTPUT_PATH}" \ -today ${dt_today} -last_req_day ${last_req_day} \ @@ -107,7 +106,6 @@ fi # --conf spark.sql.broadcastTimeout=1200 \ # --conf spark.yarn.executor.memoryOverhead=4096 \ # --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ -# --files ${HIVE_SITE_PATH} \ # --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 70 ../${JAR} \ # -gaidoutput "${GAID_OUTPUT_PATH}" -gaidinput "${GAID_INPUT_PATH}" -newoutput "${NEW_OUTPUT_PATH}" \ # -today ${dt_today} -dt_30days_ago ${dt_30days_ago} diff --git a/azkaban/ali/etl_lazada_ios_data_daily.sh b/azkaban/ali/etl_lazada_ios_data_daily.sh index fb2ad87..662b3e9 100644 --- a/azkaban/ali/etl_lazada_ios_data_daily.sh +++ b/azkaban/ali/etl_lazada_ios_data_daily.sh @@ -32,7 +32,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlLazadaIosActivitionDaily --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 90 ../${JAR} \ -today ${dt_today} -last_req_day ${last_req_day} \ -vn_idfaoutput "${VN_IDFA_OUTPUT_PATH}" -id_idfaoutput "${ID_IDFA_OUTPUT_PATH}" -th_idfaoutput "${TH_IDFA_OUTPUT_PATH}" \ diff --git a/azkaban/ali/other_single_jobs/etl_com_tencent_news_daily.sh b/azkaban/ali/other_single_jobs/etl_com_tencent_news_daily.sh index 92755cb..dee7d2e 100644 --- a/azkaban/ali/other_single_jobs/etl_com_tencent_news_daily.sh +++ b/azkaban/ali/other_single_jobs/etl_com_tencent_news_daily.sh @@ -17,7 +17,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlComTencentNewsDaily \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 120 \ ../../${JAR} \ -output ${OUTPUT_PATH} -coalesce 500 \ diff --git a/azkaban/ali/reyun/reyun_daily.sh b/azkaban/ali/reyun/reyun_daily.sh index a9aec39..a4ffbf5 100644 --- a/azkaban/ali/reyun/reyun_daily.sh +++ b/azkaban/ali/reyun/reyun_daily.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.reyun.ReyunDaily \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 3 --num-executors 60 \ ../../${JAR} \ -output ${OUTPUT_PATH} -coalesce 400 -dt_today ${dt_today} diff --git a/azkaban/ali/reyun/reyun_label_baijiu.sh b/azkaban/ali/reyun/reyun_label_baijiu.sh index bf6b715..e951049 100644 --- a/azkaban/ali/reyun/reyun_label_baijiu.sh +++ b/azkaban/ali/reyun/reyun_label_baijiu.sh @@ -24,8 +24,6 @@ spark-submit --class mobvista.dmp.datasource.reyun.ReyunLabelBaijiu \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 170 \ ../../${JAR} \ -output ${OUTPUT_PATH} -coalesce 680 -dt_today ${dt_today} diff --git a/azkaban/ali/reyun/reyun_label_test.sh b/azkaban/ali/reyun/reyun_label_test.sh index 8b434b7..9dd3add 100644 --- a/azkaban/ali/reyun/reyun_label_test.sh +++ b/azkaban/ali/reyun/reyun_label_test.sh @@ -26,7 +26,6 @@ spark-submit --class mobvista.dmp.datasource.reyun.ReyunLabelTest \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 3 --num-executors 40 \ ../../${JAR} \ -output ${OUTPUT_PATH} -coalesce 40 -dt_today ${dt_today} diff --git a/azkaban/ali/uc_lahuo/uc_imei_lahuo_ck.sh b/azkaban/ali/uc_lahuo/uc_imei_lahuo_ck.sh index 63f01cb..a108a12 100644 --- a/azkaban/ali/uc_lahuo/uc_imei_lahuo_ck.sh +++ b/azkaban/ali/uc_lahuo/uc_imei_lahuo_ck.sh @@ -24,7 +24,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCImeiLaHuoCK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/ali/uc_lahuo/uc_lahuo_daily.sh b/azkaban/ali/uc_lahuo/uc_lahuo_daily.sh index 410ce2c..21604cc 100644 --- a/azkaban/ali/uc_lahuo/uc_lahuo_daily.sh +++ b/azkaban/ali/uc_lahuo/uc_lahuo_daily.sh @@ -24,7 +24,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCLaHuoDaily \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 80 ../../${JAR} \ -imeioutput "${UC_IMEIMD5_OUTPUT_PATH}" -oaidoutput "${UC_OAIDMD5_OUTPUT_PATH}" \ -today ${dt_today} -last_req_day ${last_req_day} diff --git a/azkaban/ali/uc_lahuo/uc_lahuo_data_to_dmp.sh b/azkaban/ali/uc_lahuo/uc_lahuo_data_to_dmp.sh index c7a38ef..457d98d 100644 --- a/azkaban/ali/uc_lahuo/uc_lahuo_data_to_dmp.sh +++ b/azkaban/ali/uc_lahuo/uc_lahuo_data_to_dmp.sh @@ -40,7 +40,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCTmpDataToDMP \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ ../../${JAR} -imeiRequestInput ${UC_IMEIMD5_REQUEST_INPUT_PATH} -oaidRequestInput ${UC_OAIDMD5_REQUEST_INPUT_PATH} \ -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} -oaidResponseInput ${OAIDMD5_RESPONSE_INPUT_PATH} \ diff --git a/azkaban/ali/uc_lahuo/uc_lahuo_df.sh b/azkaban/ali/uc_lahuo/uc_lahuo_df.sh index faaa7b0..0051cdb 100644 --- a/azkaban/ali/uc_lahuo/uc_lahuo_df.sh +++ b/azkaban/ali/uc_lahuo/uc_lahuo_df.sh @@ -20,8 +20,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCLaHuoDF \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 10 \ ../../${JAR} -date ${LOG_TIME} -partNum 10 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -oaidOutput ${OAIDMD5_OUTPUT_PATH} -cluster 'cluster_1st' diff --git a/azkaban/ali/uc_lahuo/uc_oaid_lahuo_ck.sh b/azkaban/ali/uc_lahuo/uc_oaid_lahuo_ck.sh index f69ad8b..998c6fa 100644 --- a/azkaban/ali/uc_lahuo/uc_oaid_lahuo_ck.sh +++ b/azkaban/ali/uc_lahuo/uc_oaid_lahuo_ck.sh @@ -24,7 +24,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCOaidLaHuoCK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} -oaidInput ${OAIDMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/ali/uc_lahuo/uc_other_data_to_dmp.sh b/azkaban/ali/uc_lahuo/uc_other_data_to_dmp.sh index d4184a2..cf55f65 100644 --- a/azkaban/ali/uc_lahuo/uc_other_data_to_dmp.sh +++ b/azkaban/ali/uc_lahuo/uc_other_data_to_dmp.sh @@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCOtherDataToDmp \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 150 \ ../../${JAR} \ -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} \ diff --git a/azkaban/ali/uc_lahuo/uc_other_data_to_dmp_v2.sh b/azkaban/ali/uc_lahuo/uc_other_data_to_dmp_v2.sh index cca6f3e..d67c106 100644 --- a/azkaban/ali/uc_lahuo/uc_other_data_to_dmp_v2.sh +++ b/azkaban/ali/uc_lahuo/uc_other_data_to_dmp_v2.sh @@ -32,7 +32,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCOtherDataToDmpV2 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 120 \ ../../${JAR} \ -output ${OUTPUT_PATH} \ diff --git a/azkaban/ali/uc_lahuo_to_guangdiantong/ali_extract_h_32_from_dsp_req.sh b/azkaban/ali/uc_lahuo_to_guangdiantong/ali_extract_h_32_from_dsp_req.sh index 539ffb4..954dad2 100644 --- a/azkaban/ali/uc_lahuo_to_guangdiantong/ali_extract_h_32_from_dsp_req.sh +++ b/azkaban/ali/uc_lahuo_to_guangdiantong/ali_extract_h_32_from_dsp_req.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlH32FromDmInstallListV2 \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 60 \ ../../${JAR} \ -h32_imei ${IMEI_H_32_GUANGDIANTONG_RES_PATH} -h32_imeimd5 ${IMEIMD5_H_32_GUANGDIANTONG_RES_PATH} \ diff --git a/azkaban/ali/youku_laxin/youku_imei_laxin_ck.sh b/azkaban/ali/youku_laxin/youku_imei_laxin_ck.sh index 8811645..417002c 100644 --- a/azkaban/ali/youku_laxin/youku_imei_laxin_ck.sh +++ b/azkaban/ali/youku_laxin/youku_imei_laxin_ck.sh @@ -23,7 +23,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.YOUKUImeiLaXinCK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/ali/youku_laxin/youku_laxin_daily.sh b/azkaban/ali/youku_laxin/youku_laxin_daily.sh index 23fd647..13df798 100644 --- a/azkaban/ali/youku_laxin/youku_laxin_daily.sh +++ b/azkaban/ali/youku_laxin/youku_laxin_daily.sh @@ -35,7 +35,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.YOUKULaXinDaily \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 80 ../../${JAR} \ -imeioutput "${YOUKU_IMEIMD5_OUTPUT_PATH}" -oaidoutput "${YOUKU_OAIDMD5_OUTPUT_PATH}" \ -input_one_day ${INPUT_ONE_DAY} -input_two_day ${INPUT_TWO_DAY} -input_three_day ${INPUT_THREE_DAY} \ diff --git a/azkaban/ali/youku_laxin/youku_laxin_data_to_dmp.sh b/azkaban/ali/youku_laxin/youku_laxin_data_to_dmp.sh index 008cdd6..de379f8 100644 --- a/azkaban/ali/youku_laxin/youku_laxin_data_to_dmp.sh +++ b/azkaban/ali/youku_laxin/youku_laxin_data_to_dmp.sh @@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.YoukuTmpDataToDmp \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ ../../${JAR} -Input "${INPUT_PATH}/*/*" -Output ${OUTPUT_PATH} \ -update ${update} @@ -53,7 +52,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.YoukuLaXinPollingDataDedupli --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ ../../${JAR} -dt_today ${dt_today} -dt_begin_days ${dt_begin_days} \ -AcquisitionOutput ${ACQUISITIONOUTPUT} diff --git a/azkaban/ali/youku_laxin/youku_laxin_df.sh b/azkaban/ali/youku_laxin/youku_laxin_df.sh index 9a1b4d0..ee6613e 100644 --- a/azkaban/ali/youku_laxin/youku_laxin_df.sh +++ b/azkaban/ali/youku_laxin/youku_laxin_df.sh @@ -20,8 +20,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.YOUKULaxinDF \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 10 \ ../../${JAR} -date ${LOG_TIME} -dt_three_days_ago ${dt_three_days_ago} -partNum 10 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -oaidOutput ${OAIDMD5_OUTPUT_PATH} -cluster 'cluster_1st' diff --git a/azkaban/ali/youku_laxin/youku_oaid_laxin_ck.sh b/azkaban/ali/youku_laxin/youku_oaid_laxin_ck.sh index e02601f..878f97b 100644 --- a/azkaban/ali/youku_laxin/youku_oaid_laxin_ck.sh +++ b/azkaban/ali/youku_laxin/youku_oaid_laxin_ck.sh @@ -23,7 +23,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.YOUKUOaidLaXinCK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} -oaidInput ${OAIDMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/app_info/app_info_adr_3s.sh b/azkaban/app_info/app_info_adr_3s.sh index c4bb39a..a427a98 100644 --- a/azkaban/app_info/app_info_adr_3s.sh +++ b/azkaban/app_info/app_info_adr_3s.sh @@ -21,7 +21,7 @@ check_await "s3://mob-emr-test/leo.liang/dmp_dummper/all_3s_android.json.log" #--deploy-mode cluster spark-submit --class mobvista.dmp.datasource.app_info_tag.App_info_adr_3s \ --conf spark.yarn.executor.memoryOverhead=2048 --conf spark.network.timeout=720s \ - --master yarn --deploy-mode cluster --files /data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml --name app_info_adr_3s --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \ + --master yarn --deploy-mode cluster --name app_info_adr_3s --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \ ../${JAR} if [ $? -ne 0 ];then exit 255 diff --git a/azkaban/app_info/app_info_ios_3s.sh b/azkaban/app_info/app_info_ios_3s.sh index b1cc6bf..283e909 100644 --- a/azkaban/app_info/app_info_ios_3s.sh +++ b/azkaban/app_info/app_info_ios_3s.sh @@ -19,7 +19,7 @@ check_await "s3://mob-emr-test/leo.liang/dmp_dummper/all_3s_ios.json.log" spark-submit --class mobvista.dmp.datasource.app_info_tag.App_info_ios_3s \ --conf spark.yarn.executor.memoryOverhead=2048 --conf spark.network.timeout=720s \ - --master yarn --deploy-mode cluster --name app_info_ios_3s --files /data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \ + --master yarn --deploy-mode cluster --name app_info_ios_3s --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \ ../${JAR} if [ $? -ne 0 ];then exit 255 diff --git a/azkaban/app_info/collect_package_name.sh b/azkaban/app_info/collect_package_name.sh index 7c3f5b2..3846ea1 100644 --- a/azkaban/app_info/collect_package_name.sh +++ b/azkaban/app_info/collect_package_name.sh @@ -83,7 +83,6 @@ install() { --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.driver.maxResultSize=4g \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --name CrawPkgsSpark --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 100 \ ../${JAR} -pkginstallpath ${PACKAGE_INSTALL_PATH} -coalesce 10 \ -yesday ${YESTERDAY} diff --git a/azkaban/app_info/etl_app_info_adr.sh b/azkaban/app_info/etl_app_info_adr.sh index 22fc7f7..551a03f 100644 --- a/azkaban/app_info/etl_app_info_adr.sh +++ b/azkaban/app_info/etl_app_info_adr.sh @@ -24,15 +24,13 @@ echo "dim_app_info_adr file success exist, and then can start" : ' spark-submit --class mobvista.dmp.datasource.app_info_tag.Etl_app_info_adr \ --conf spark.yarn.executor.memoryOverhead=2048 --conf spark.network.timeout=720s \ - --master yarn --deploy-mode cluster --name etl_app_info_adr --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --files /data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml --driver-memory 4g --executor-cores 2 --num-executors 30 \ + --master yarn --deploy-mode cluster --name etl_app_info_adr --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \ ../${JAR} ' spark-submit --class mobvista.dmp.datasource.app_info_tag.Etl_app_info_adr_v2 \ --name "mobvista.dmp.datasource.app_info_tag.Etl_app_info_adr_v2_wangjf_${LOG_TIME}" \ --conf spark.yarn.executor.memoryOverhead=2048 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} \ -date ${LOG_TIME} -output ${OUTPUT_PATH} diff --git a/azkaban/app_info/etl_app_info_ios.sh b/azkaban/app_info/etl_app_info_ios.sh index 9290dab..f5cf8d4 100644 --- a/azkaban/app_info/etl_app_info_ios.sh +++ b/azkaban/app_info/etl_app_info_ios.sh @@ -26,15 +26,13 @@ echo "dim_app_info_ios file success exist, and then can start" : ' spark-submit --class mobvista.dmp.datasource.app_info_tag.Etl_app_info_ios \ --conf spark.yarn.executor.memoryOverhead=2048 --conf spark.network.timeout=720s \ ---master yarn --deploy-mode cluster --name etl_app_info_ios --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 --files /data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml \ +--master yarn --deploy-mode cluster --name etl_app_info_ios --conf spark.app.loadTime=${LOG_TIME} --conf spark.app.output_path=${OUTPUT_PATH} --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \ ../${JAR} ' spark-submit --class mobvista.dmp.datasource.app_info_tag.Etl_app_info_ios_v2 \ --name "mobvista.dmp.datasource.app_info_tag.Etl_app_info_ios_v2_wangjf_${LOG_TIME}" \ --conf spark.yarn.executor.memoryOverhead=2048 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} \ -date ${LOG_TIME} -output ${OUTPUT_PATH} diff --git a/azkaban/app_info/import_campaign_tags.sh b/azkaban/app_info/import_campaign_tags.sh index c814ded..b6e56c7 100644 --- a/azkaban/app_info/import_campaign_tags.sh +++ b/azkaban/app_info/import_campaign_tags.sh @@ -28,7 +28,6 @@ hadoop fs -rmr $output_path spark-submit --class mobvista.dmp.datasource.newtag.ImportCampaignTags \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=5 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 5 \ ../${JAR} -newPath ${new_path} -output $output_path diff --git a/azkaban/app_info/import_package_tags.sh b/azkaban/app_info/import_package_tags.sh index 7e80dd7..667c9cb 100644 --- a/azkaban/app_info/import_package_tags.sh +++ b/azkaban/app_info/import_package_tags.sh @@ -31,7 +31,6 @@ hadoop fs -rmr $output_path spark-submit --class mobvista.dmp.datasource.newtag.ImportPkgTags \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=5 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 5 \ ../${JAR} -newPath ${new_path} -output $output_path diff --git a/azkaban/app_info/package_black_list.sh b/azkaban/app_info/package_black_list.sh index 9234079..ef27f84 100644 --- a/azkaban/app_info/package_black_list.sh +++ b/azkaban/app_info/package_black_list.sh @@ -20,7 +20,6 @@ EXPIRE_PATH="${PACKAGE_BLACK_LIST}/$expire_path" spark-submit --class mobvista.dmp.main.PackageBlackList \ --conf spark.sql.shuffle.partitions=10 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date "${yes_date}" \ diff --git a/azkaban/app_tag/app_tag.sh b/azkaban/app_tag/app_tag.sh index 42b3745..923a838 100755 --- a/azkaban/app_tag/app_tag.sh +++ b/azkaban/app_tag/app_tag.sh @@ -15,7 +15,6 @@ spark-submit --class mobvista.dmp.datasource.apptag.AppTagJob \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf dfs.socket.timeout=300000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} -output ${output_path} diff --git a/azkaban/appsflyer/appsflyer_total.sh b/azkaban/appsflyer/appsflyer_total.sh index 906139f..283ffdf 100644 --- a/azkaban/appsflyer/appsflyer_total.sh +++ b/azkaban/appsflyer/appsflyer_total.sh @@ -62,7 +62,6 @@ $HIVE_CMD -v -hivevar dt_today ${dt_today} -hivevar update_date ${dt_today --conf spark.network.timeout=720s \ --conf spark.default.parallelism=10 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name apps_flyer_total --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 5 \ ../${JAR} -outputtotal ${OUTPUT_TOTAL_PATH} -dmpuserinfo ${DMP_USER_INFO_OUTPUT_PATH} \ diff --git a/azkaban/bigmedia_domestic/bigmedia_domestic_launch_total.sh b/azkaban/bigmedia_domestic/bigmedia_domestic_launch_total.sh index d8b5489..fc714c8 100644 --- a/azkaban/bigmedia_domestic/bigmedia_domestic_launch_total.sh +++ b/azkaban/bigmedia_domestic/bigmedia_domestic_launch_total.sh @@ -52,7 +52,6 @@ spark-submit --class mobvista.dmp.datasource.bigmedia_domestic.BigMediaDomestic --conf spark.default.parallelism=400 \ --conf spark.sql.shuffle.partitions=400 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name BigMediaDomestic --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 36 \ ../${JAR} -bigmediainput ${INPUT_PATH_DAILY} -outputdaily ${OUTPUT_BIGMEDIA_DAILY_PATH} -outputgender ${GENDER_BIG_MEDIA_OUTPUT_PATH} -coalesce 50 \ diff --git a/azkaban/bytedance/bytedance_device_tag.sh b/azkaban/bytedance/bytedance_device_tag.sh index e26f74c..4473c43 100644 --- a/azkaban/bytedance/bytedance_device_tag.sh +++ b/azkaban/bytedance/bytedance_device_tag.sh @@ -33,8 +33,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=50 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 3 --num-executors 20 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 10 diff --git a/azkaban/bytedance/bytedance_device_tag_daily.sh b/azkaban/bytedance/bytedance_device_tag_daily.sh index f204a9e..05ba502 100644 --- a/azkaban/bytedance/bytedance_device_tag_daily.sh +++ b/azkaban/bytedance/bytedance_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.bytedance.ByteDanceTagDaily \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 5 diff --git a/azkaban/bytedance/bytedance_device_tag_v2.sh b/azkaban/bytedance/bytedance_device_tag_v2.sh index 0b73c85..3ff304f 100644 --- a/azkaban/bytedance/bytedance_device_tag_v2.sh +++ b/azkaban/bytedance/bytedance_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=8 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 8 diff --git a/azkaban/bytedance/bytedance_install_total_orc.sh b/azkaban/bytedance/bytedance_install_total_orc.sh index 500effd..e925173 100644 --- a/azkaban/bytedance/bytedance_install_total_orc.sh +++ b/azkaban/bytedance/bytedance_install_total_orc.sh @@ -35,8 +35,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=20 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 10 diff --git a/azkaban/clever/adn_clever_device_tag.sh b/azkaban/clever/adn_clever_device_tag.sh index aa82752..e0fecab 100644 --- a/azkaban/clever/adn_clever_device_tag.sh +++ b/azkaban/clever/adn_clever_device_tag.sh @@ -33,8 +33,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=100 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 3 --num-executors 20 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100 diff --git a/azkaban/clever/adn_clever_device_tag_daily.sh b/azkaban/clever/adn_clever_device_tag_daily.sh index c3b4f5e..3961cf5 100644 --- a/azkaban/clever/adn_clever_device_tag_daily.sh +++ b/azkaban/clever/adn_clever_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.clever.AdnCleverTagDaily \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1 diff --git a/azkaban/clever/adn_clever_device_tag_v2.sh b/azkaban/clever/adn_clever_device_tag_v2.sh index 928175b..3a180d3 100644 --- a/azkaban/clever/adn_clever_device_tag_v2.sh +++ b/azkaban/clever/adn_clever_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=4 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4 diff --git a/azkaban/clever/adn_clever_install_total_orc.sh b/azkaban/clever/adn_clever_install_total_orc.sh index 269d495..1af900e 100644 --- a/azkaban/clever/adn_clever_install_total_orc.sh +++ b/azkaban/clever/adn_clever_install_total_orc.sh @@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=20 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 10 diff --git a/azkaban/clever/adn_clever_install_v2.sh b/azkaban/clever/adn_clever_install_v2.sh index bf1640f..c73c11e 100644 --- a/azkaban/clever/adn_clever_install_v2.sh +++ b/azkaban/clever/adn_clever_install_v2.sh @@ -36,8 +36,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../${JAR} \ -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/datatory/datatory.sh b/azkaban/datatory/datatory.sh index f9eed48..6585c3b 100644 --- a/azkaban/datatory/datatory.sh +++ b/azkaban/datatory/datatory.sh @@ -35,8 +35,6 @@ spark-submit --class mobvista.dmp.datasource.datatory.DatatoryJob \ --conf spark.default.parallelism=${partition} \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory ${execNum}g --driver-memory 6g --executor-cores 4 --num-executors 150 \ ../${JAR} -json ${json// /&@} -tag "${tag}" -coalesce 1 diff --git a/azkaban/datatory/tracking/3s_tracking_event_daily.sh b/azkaban/datatory/tracking/3s_tracking_event_daily.sh index d7280dd..ac80597 100644 --- a/azkaban/datatory/tracking/3s_tracking_event_daily.sh +++ b/azkaban/datatory/tracking/3s_tracking_event_daily.sh @@ -28,8 +28,6 @@ spark-submit --class mobvista.dmp.datasource.datatory.TrackingEventDaily \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../../${JAR} -date ${date} -output ${output_path} -info_output ${info_output_path} -before_date ${before_date} diff --git a/azkaban/datatory/tracking/3s_trackingnew_install_daily.sh b/azkaban/datatory/tracking/3s_trackingnew_install_daily.sh index 8736db8..18d8402 100644 --- a/azkaban/datatory/tracking/3s_trackingnew_install_daily.sh +++ b/azkaban/datatory/tracking/3s_trackingnew_install_daily.sh @@ -26,8 +26,6 @@ spark-submit --class mobvista.dmp.datasource.datatory.TrackingInstallDaily \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../../${JAR} -date ${date} -output ${output_path} diff --git a/azkaban/datatory/tracking/adn_tracking_ck.sh b/azkaban/datatory/tracking/adn_tracking_ck.sh index 4e5ef16..747b897 100644 --- a/azkaban/datatory/tracking/adn_tracking_ck.sh +++ b/azkaban/datatory/tracking/adn_tracking_ck.sh @@ -23,7 +23,6 @@ spark-submit --class mobvista.dmp.clickhouse.tracking.AdnTrackingDaily \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../../${JAR} -date ${date} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/datatory/tracking/adn_trackingnew_event_daily.sh b/azkaban/datatory/tracking/adn_trackingnew_event_daily.sh index a13a913..c8af887 100644 --- a/azkaban/datatory/tracking/adn_trackingnew_event_daily.sh +++ b/azkaban/datatory/tracking/adn_trackingnew_event_daily.sh @@ -25,8 +25,6 @@ spark-submit --class mobvista.dmp.datasource.datatory.AdnTrackingEventDaily \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../../${JAR} -date ${date} -output ${output_path} diff --git a/azkaban/datatory/tracking/adn_trackingnew_install_daily.sh b/azkaban/datatory/tracking/adn_trackingnew_install_daily.sh index cca5dab..b6e5f6e 100644 --- a/azkaban/datatory/tracking/adn_trackingnew_install_daily.sh +++ b/azkaban/datatory/tracking/adn_trackingnew_install_daily.sh @@ -25,8 +25,6 @@ spark-submit --class mobvista.dmp.datasource.datatory.AdnTrackingInstallDaily \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../../${JAR} -date ${date} -output ${output_path} diff --git a/azkaban/datatory/tracking/adn_trackingnew_merge_daily.sh b/azkaban/datatory/tracking/adn_trackingnew_merge_daily.sh index 0830a04..e66e137 100644 --- a/azkaban/datatory/tracking/adn_trackingnew_merge_daily.sh +++ b/azkaban/datatory/tracking/adn_trackingnew_merge_daily.sh @@ -25,8 +25,6 @@ spark-submit --class mobvista.dmp.datasource.datatory.AdnTrackingMergeDaily \ --conf spark.sql.shuffle.partitions=100 \ --conf spark.default.parallelism=100 \ --conf spark.sql.files.maxPartitionBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 3 --num-executors 5 \ ../../${JAR} -date ${date} -output ${output_path} diff --git a/azkaban/datatory/tracking/tracking_ck.sh b/azkaban/datatory/tracking/tracking_ck.sh index ae1f4b6..e28f4be 100644 --- a/azkaban/datatory/tracking/tracking_ck.sh +++ b/azkaban/datatory/tracking/tracking_ck.sh @@ -30,7 +30,6 @@ spark-submit --class mobvista.dmp.clickhouse.tracking.TrackingDaily \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../../${JAR} -date ${date} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/datatory/tracking/trackingnew_merge_daily.sh b/azkaban/datatory/tracking/trackingnew_merge_daily.sh index 95b9d1a..e6ea59d 100644 --- a/azkaban/datatory/tracking/trackingnew_merge_daily.sh +++ b/azkaban/datatory/tracking/trackingnew_merge_daily.sh @@ -23,8 +23,6 @@ spark-submit --class mobvista.dmp.datasource.datatory.TrackingMergeDaily \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../../${JAR} -date ${date} -output ${output_path} diff --git a/azkaban/datatory/user_info/user_info_ck.sh b/azkaban/datatory/user_info/user_info_ck.sh index b9d0ec8..292f1f6 100644 --- a/azkaban/datatory/user_info/user_info_ck.sh +++ b/azkaban/datatory/user_info/user_info_ck.sh @@ -32,7 +32,6 @@ spark-submit --class mobvista.dmp.clickhouse.feature.UserInfo \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 10 \ ../../${JAR} -date ${date} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/dm/dm_active_tag_month.sh b/azkaban/dm/dm_active_tag_month.sh index 403b5ef..ac3e853 100644 --- a/azkaban/dm/dm_active_tag_month.sh +++ b/azkaban/dm/dm_active_tag_month.sh @@ -64,8 +64,6 @@ spark-submit --class mobvista.dmp.datasource.dm.ActiveTag \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 18g --driver-memory 4g --executor-cores 5 --num-executors 60 \ ../${JAR} \ -date ${date} -output ${OUTPUT_PATH} -coalesce 1000 -days 29 diff --git a/azkaban/dm/dm_active_tag_week.sh b/azkaban/dm/dm_active_tag_week.sh index f278938..89dc14c 100644 --- a/azkaban/dm/dm_active_tag_week.sh +++ b/azkaban/dm/dm_active_tag_week.sh @@ -66,8 +66,6 @@ spark-submit --class mobvista.dmp.datasource.dm.ActiveTag \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 18g --driver-memory 4g --executor-cores 5 --num-executors 40 \ ../${JAR} \ -date ${date} -output ${OUTPUT_PATH} -coalesce 1000 -days 6 diff --git a/azkaban/dm/dm_interest_tag_all_v2.sh b/azkaban/dm/dm_interest_tag_all_v2.sh index e40a100..ebc6ad9 100644 --- a/azkaban/dm/dm_interest_tag_all_v2.sh +++ b/azkaban/dm/dm_interest_tag_all_v2.sh @@ -48,8 +48,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmpDeviceInterest \ --conf spark.kryoserializer.buffer=64m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 100 \ ../${JAR} \ -date ${date} -output ${OUTPUT_PATH} diff --git a/azkaban/dm/dm_interest_tag_all_v3.sh b/azkaban/dm/dm_interest_tag_all_v3.sh index 55c9d5a..ee3b0f0 100644 --- a/azkaban/dm/dm_interest_tag_all_v3.sh +++ b/azkaban/dm/dm_interest_tag_all_v3.sh @@ -60,8 +60,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInterestTagAllV2 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 3 --num-executors 256 \ ../${JAR} \ -output ${OUTPUT_PATH} -date ${date} -ga_date ${ga_date} -coalesce 5000 diff --git a/azkaban/dm/dmp_device_id_md5.sh b/azkaban/dm/dmp_device_id_md5.sh index 1c99c71..0f5d561 100644 --- a/azkaban/dm/dmp_device_id_md5.sh +++ b/azkaban/dm/dmp_device_id_md5.sh @@ -26,7 +26,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmpDeviceIdMd5 \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 60 \ ../${JAR} \ -date ${date} -coalesce 1000 -output $OUTPUT_PATH diff --git a/azkaban/dm/dmp_device_interest.sh b/azkaban/dm/dmp_device_interest.sh index 1cc1719..355157c 100644 --- a/azkaban/dm/dmp_device_interest.sh +++ b/azkaban/dm/dmp_device_interest.sh @@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmpDeviceInterest \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 80 \ ../${JAR} \ -date ${date} -output ${OUTPUT_PATH} -coalesce 4000 diff --git a/azkaban/dm/dmp_device_tag_daily.sh b/azkaban/dm/dmp_device_tag_daily.sh index 9d832fa..82cb5ca 100644 --- a/azkaban/dm/dmp_device_tag_daily.sh +++ b/azkaban/dm/dmp_device_tag_daily.sh @@ -39,7 +39,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 18g --driver-memory 6g --executor-cores 5 --num-executors 80 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4000 diff --git a/azkaban/dm/dmp_install_list_common.sh b/azkaban/dm/dmp_install_list_common.sh index 4198c1d..dc39598 100644 --- a/azkaban/dm/dmp_install_list_common.sh +++ b/azkaban/dm/dmp_install_list_common.sh @@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.common.Old2NewInstallList \ --conf spark.default.parallelism=${PARTITION} \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors ${EXECUTOR} \ ../${JAR} -date ${LOG_TIME} -business ${BUSINESS} -output ${OUTPUT} -coalesce ${COALESCE} diff --git a/azkaban/dm/dmp_install_list_daily.sh b/azkaban/dm/dmp_install_list_daily.sh index a4d5973..e605f33 100644 --- a/azkaban/dm/dmp_install_list_daily.sh +++ b/azkaban/dm/dmp_install_list_daily.sh @@ -60,7 +60,6 @@ spark-submit --class mobvista.dmp.common.InstallListDaily \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --conf spark.kubernetes.memoryOverheadFactor=0.2 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 160 \ ../${JAR} -date ${date} -coalesce 3000 -output ${OUTPUT_PATH} diff --git a/azkaban/dm/dmp_install_list_daily_v2.sh b/azkaban/dm/dmp_install_list_daily_v2.sh index 6a83596..a452892 100644 --- a/azkaban/dm/dmp_install_list_daily_v2.sh +++ b/azkaban/dm/dmp_install_list_daily_v2.sh @@ -58,7 +58,6 @@ spark-submit --class mobvista.dmp.common.InstallListDailyV2 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --conf spark.kubernetes.memoryOverheadFactor=0.2 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 3 --num-executors 256 \ ../${JAR} -date ${date} -coalesce 4000 -output ${OUTPUT_PATH} diff --git a/azkaban/dm/dmp_install_list_merge.sh b/azkaban/dm/dmp_install_list_merge.sh index 9b19c0e..63eac36 100644 --- a/azkaban/dm/dmp_install_list_merge.sh +++ b/azkaban/dm/dmp_install_list_merge.sh @@ -41,7 +41,6 @@ spark-submit --class mobvista.dmp.common.InstallListMerge \ --conf spark.driver.maxResultSize=8g \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.files.maxPartitionBytes=134217728 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 100 \ ../${JAR} -date ${date} -coalesce 4000 -output ${OUTPUT_PATH} diff --git a/azkaban/dm/fix_dmp_install_list.sh b/azkaban/dm/fix_dmp_install_list.sh index b7ca1d2..45c0c54 100644 --- a/azkaban/dm/fix_dmp_install_list.sh +++ b/azkaban/dm/fix_dmp_install_list.sh @@ -24,7 +24,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixDmpInstallList \ --conf spark.kryoserializer.buffer=64m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../${JAR} \ -date ${date} -coalesce 4000 -output $OUTPUT_PATH -business ${BUSINESS} diff --git a/azkaban/dm/fix_install_list_ruid.sh b/azkaban/dm/fix_install_list_ruid.sh index 10db7f0..59eb39b 100644 --- a/azkaban/dm/fix_install_list_ruid.sh +++ b/azkaban/dm/fix_install_list_ruid.sh @@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../${JAR} \ -date ${date} -coalesce 1000 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING} diff --git a/azkaban/dm/install_list_other_v2.sh b/azkaban/dm/install_list_other_v2.sh index 54b9992..a45b535 100644 --- a/azkaban/dm/install_list_other_v2.sh +++ b/azkaban/dm/install_list_other_v2.sh @@ -37,8 +37,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../${JAR} -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/dm/install_other_device_tag.sh b/azkaban/dm/install_other_device_tag.sh index 1d6021f..4aeb8e9 100644 --- a/azkaban/dm/install_other_device_tag.sh +++ b/azkaban/dm/install_other_device_tag.sh @@ -33,8 +33,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=200 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 40 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 200 diff --git a/azkaban/dm/other_device_tag_v2.sh b/azkaban/dm/other_device_tag_v2.sh index 8885cef..8869495 100644 --- a/azkaban/dm/other_device_tag_v2.sh +++ b/azkaban/dm/other_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=4 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4 diff --git a/azkaban/dm/other_install_total_orc.sh b/azkaban/dm/other_install_total_orc.sh index 5a3c891..dd668b0 100644 --- a/azkaban/dm/other_install_total_orc.sh +++ b/azkaban/dm/other_install_total_orc.sh @@ -29,8 +29,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=8 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 4 diff --git a/azkaban/dm/pseudo_package_to_other_business/Age_Package_Names.sh b/azkaban/dm/pseudo_package_to_other_business/Age_Package_Names.sh index 8f61bee..185107f 100644 --- a/azkaban/dm/pseudo_package_to_other_business/Age_Package_Names.sh +++ b/azkaban/dm/pseudo_package_to_other_business/Age_Package_Names.sh @@ -26,7 +26,6 @@ spark-submit --class mobvista.dmp.datasource.dm.AgePackageNames \ --conf spark.default.parallelism=3000 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 220 \ ../../${JAR} -dt_today ${dt_today} -update ${update} -Age_Package_Names ${Age_Package_Names} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -coalesce 200 diff --git a/azkaban/dm/pseudo_package_to_other_business/Canglan_Package_Names.sh b/azkaban/dm/pseudo_package_to_other_business/Canglan_Package_Names.sh index bf6b12c..230bba2 100644 --- a/azkaban/dm/pseudo_package_to_other_business/Canglan_Package_Names.sh +++ b/azkaban/dm/pseudo_package_to_other_business/Canglan_Package_Names.sh @@ -26,7 +26,6 @@ spark-submit --class mobvista.dmp.datasource.dm.CanglanPackageNames \ --conf spark.default.parallelism=3000 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 220 \ ../../${JAR} -dt_today ${dt_today} -update ${update} -Canglan_Package_Names ${Canglan_Package_Names} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -coalesce 200 diff --git a/azkaban/dm/pseudo_package_to_other_business/Three_Kingdoms_Game.sh b/azkaban/dm/pseudo_package_to_other_business/Three_Kingdoms_Game.sh index 4f75df3..9da7cc6 100644 --- a/azkaban/dm/pseudo_package_to_other_business/Three_Kingdoms_Game.sh +++ b/azkaban/dm/pseudo_package_to_other_business/Three_Kingdoms_Game.sh @@ -26,7 +26,6 @@ spark-submit --class mobvista.dmp.datasource.dm.ThreeKingdomsGame \ --conf spark.default.parallelism=3000 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 220 \ ../../${JAR} -dt_today ${dt_today} -update ${update} -package_names_input ${Three_Kingdoms_Package_Names} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -coalesce 200 diff --git a/azkaban/dm/pseudo_package_to_other_business/adx_packagename_synchronize.sh b/azkaban/dm/pseudo_package_to_other_business/adx_packagename_synchronize.sh index 1fe85fb..0514bb6 100644 --- a/azkaban/dm/pseudo_package_to_other_business/adx_packagename_synchronize.sh +++ b/azkaban/dm/pseudo_package_to_other_business/adx_packagename_synchronize.sh @@ -21,7 +21,6 @@ spark-submit --class mobvista.dmp.datasource.dm.AdxPackagenameSynchronize \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.driver.maxResultSize=4g \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 50 \ ../../${JAR} -dt_today ${dt_today} -output1 ${OUTPUT_PATH1} \ -dt_day ${dt_day} \ diff --git a/azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv.sh b/azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv.sh index 03cfbae..57fa383 100644 --- a/azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv.sh +++ b/azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv.sh @@ -30,7 +30,6 @@ spark-submit --class mobvista.dmp.datasource.dm.BtopTiktokrv \ --conf spark.default.parallelism=3000 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 70 \ ../../${JAR} -begin_day ${begin_day} -begin_day02 ${begin_day02} -end_day ${end_day} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -output03 ${OUTPUT_PATH03} -output04 ${OUTPUT_PATH04} -coalesce 200 diff --git a/azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv_gaid.sh b/azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv_gaid.sh index 41c2709..474bf7b 100644 --- a/azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv_gaid.sh +++ b/azkaban/dm/pseudo_package_to_other_business/btop_tiktokrv_gaid.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.dm.BtopTiktokrvGaid \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.driver.maxResultSize=4g \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 70 \ ../../${JAR} -begin_day ${begin_day} -begin_day02 ${begin_day02} -end_day ${end_day} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -output03 ${OUTPUT_PATH03} -coalesce 200 diff --git a/azkaban/dm/pseudo_package_to_other_business/cn_good_channel.sh b/azkaban/dm/pseudo_package_to_other_business/cn_good_channel.sh index 975df26..0325161 100644 --- a/azkaban/dm/pseudo_package_to_other_business/cn_good_channel.sh +++ b/azkaban/dm/pseudo_package_to_other_business/cn_good_channel.sh @@ -35,7 +35,6 @@ spark-submit --class mobvista.dmp.datasource.dm.CnGoodChannel \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.driver.maxResultSize=4g \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 100 \ ../../${JAR} -output1 ${OUTPUT_PATH1} -output2 ${OUTPUT_PATH2} -output3 ${OUTPUT_PATH3} \ -output4 ${OUTPUT_PATH4} -output5 ${OUTPUT_PATH5} -old_data_path ${OLD_DATA_PATH} \ diff --git a/azkaban/dm/pseudo_package_to_other_business/com_eg_android_AlipayGphone_reyun.sh b/azkaban/dm/pseudo_package_to_other_business/com_eg_android_AlipayGphone_reyun.sh index e0b2081..cd8cb8b 100644 --- a/azkaban/dm/pseudo_package_to_other_business/com_eg_android_AlipayGphone_reyun.sh +++ b/azkaban/dm/pseudo_package_to_other_business/com_eg_android_AlipayGphone_reyun.sh @@ -36,7 +36,6 @@ spark-submit --class mobvista.dmp.datasource.dm.ComEgAndroidAlipayGphoneReyun \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.driver.maxResultSize=4g \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 10 \ ../../${JAR} -dt_today ${dt_today} -output1 ${OUTPUT_PATH1} -output2 ${OUTPUT_PATH2} -update ${update} \ -coalesce 200 diff --git a/azkaban/dm/pseudo_package_to_other_business/phone_wax_nobid_to_dmp.sh b/azkaban/dm/pseudo_package_to_other_business/phone_wax_nobid_to_dmp.sh index ce3646f..13efc9d 100644 --- a/azkaban/dm/pseudo_package_to_other_business/phone_wax_nobid_to_dmp.sh +++ b/azkaban/dm/pseudo_package_to_other_business/phone_wax_nobid_to_dmp.sh @@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.datasource.dm.YoukuPhoneWaxNobid \ --conf spark.default.parallelism=3000 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 150 \ ../../${JAR} -begin_day ${begin_day} -end_day ${end_day} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -coalesce 200 diff --git a/azkaban/dm/pseudo_package_to_other_business/reyun_lahuo_list.sh b/azkaban/dm/pseudo_package_to_other_business/reyun_lahuo_list.sh index 34d62f6..85775e4 100644 --- a/azkaban/dm/pseudo_package_to_other_business/reyun_lahuo_list.sh +++ b/azkaban/dm/pseudo_package_to_other_business/reyun_lahuo_list.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.dm.ReyunLahuoList \ --conf spark.default.parallelism=3000 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 10 \ ../../${JAR} -dt_today ${dt_today} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -update_date ${update_date}\ -coalesce 200 diff --git a/azkaban/dm/pseudo_package_to_other_business/rtdmp_normal.sh b/azkaban/dm/pseudo_package_to_other_business/rtdmp_normal.sh index a46a38b..a127d58 100644 --- a/azkaban/dm/pseudo_package_to_other_business/rtdmp_normal.sh +++ b/azkaban/dm/pseudo_package_to_other_business/rtdmp_normal.sh @@ -26,7 +26,6 @@ spark-submit --class mobvista.dmp.datasource.dm.RtdmpNormal \ --conf spark.default.parallelism=3000 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 30 \ ../../${JAR} -dt_today ${dt_today} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -rtdmp_normal_count_result "${RTDMP_NORMAL_COUNT_RESULT}/${dt_slash_today}" -coalesce 200 diff --git a/azkaban/dm/pseudo_package_to_other_business/rtdmp_tmp_id1142110895.sh b/azkaban/dm/pseudo_package_to_other_business/rtdmp_tmp_id1142110895.sh index 8b0a93b..06d8a81 100644 --- a/azkaban/dm/pseudo_package_to_other_business/rtdmp_tmp_id1142110895.sh +++ b/azkaban/dm/pseudo_package_to_other_business/rtdmp_tmp_id1142110895.sh @@ -17,7 +17,6 @@ spark-submit --class mobvista.dmp.datasource.dm.RtdmpTmpId1142110895 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.driver.maxResultSize=4g \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 70 \ ../../${JAR} -dt_today ${dt_today} -output1 ${OUTPUT_PATH1} \ -coalesce 420 diff --git a/azkaban/dm/pseudo_package_to_other_business/shinny.sh b/azkaban/dm/pseudo_package_to_other_business/shinny.sh index e037e21..0019aa1 100644 --- a/azkaban/dm/pseudo_package_to_other_business/shinny.sh +++ b/azkaban/dm/pseudo_package_to_other_business/shinny.sh @@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.datasource.dm.ShinnyPackageNames \ --conf spark.default.parallelism=3000 \ --conf spark.sql.shuffle.partitions=3000 \ --conf spark.network.timeout=720s \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 220 \ ../../${JAR} -dt_today ${dt_today} -update ${update} -Shinny_Package_Names ${Shinny_Package_Names} -output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} -coalesce 200 diff --git a/azkaban/dmp_env.sh b/azkaban/dmp_env.sh index 458a4db..0d5fb4b 100644 --- a/azkaban/dmp_env.sh +++ b/azkaban/dmp_env.sh @@ -849,8 +849,6 @@ userInfoJob() { --conf spark.speculation=true \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1 \ - --jars ${JARS} \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 20 \ ${jar} -date $LOG_TIME -dailyPath $dailyPath -agePath $agePath -genderPath $genderPath \ -dailyFormat ${dailyFormat} -dailyDidIndex $dailyDidIndex -dailyDidTypeIndex $dailyDidTypeIndex -dailyPltIndex $dailyPltIndex -dailyCountryIndex $dailyCountryIndex \ @@ -898,8 +896,6 @@ userInfoJob_dsp_req() { --conf spark.speculation=true \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1 \ - --jars ${JARS} \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 80 \ ${jar} -date $LOG_TIME -dailyPath $dailyPath -agePath $agePath -genderPath $genderPath \ -dailyFormat ${dailyFormat} -dailyDidIndex $dailyDidIndex -dailyDidTypeIndex $dailyDidTypeIndex -dailyPltIndex $dailyPltIndex -dailyCountryIndex $dailyCountryIndex \ @@ -963,8 +959,6 @@ installListJob() { --conf spark.default.parallelism=${partitions} \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory ${exec_memory}g --driver-memory ${driver_memory}g --executor-cores 2 --num-executors ${executors} \ ${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce ${coalesce} if [[ $? -ne 0 ]];then diff --git a/azkaban/dmp_event_tag/dmp_event_tag.sh b/azkaban/dmp_event_tag/dmp_event_tag.sh index ed9a9c8..3a6eda6 100644 --- a/azkaban/dmp_event_tag/dmp_event_tag.sh +++ b/azkaban/dmp_event_tag/dmp_event_tag.sh @@ -17,8 +17,6 @@ spark-submit --class mobvista.dmp.datasource.event_tag.DmpEventTag \ --conf spark.sql.shuffle.partitions=600 \ --conf spark.default.parallelism=600 \ --conf spark.sql.broadcastTimeout=600 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-cores 3 --executor-memory 6G --num-executors 20 \ ../${JAR} \ -today ${date} -output ${OUTPUT_PATH} -coalesce 600 diff --git a/azkaban/dmp_event_tag/dmp_event_tag_pre.sh b/azkaban/dmp_event_tag/dmp_event_tag_pre.sh index 9108143..6d56506 100644 --- a/azkaban/dmp_event_tag/dmp_event_tag_pre.sh +++ b/azkaban/dmp_event_tag/dmp_event_tag_pre.sh @@ -10,8 +10,6 @@ date_path=$(date +%Y/%m/%d -d "-1 day $today") spark-submit --class mobvista.dmp.datasource.newtag.CalInterestTag \ --conf spark.network.timeout=720s \ --conf spark.sql.broadcastTimeout=1200\ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-cores 2 --driver-memory 4g --executor-memory 6G --num-executors 1 \ ../${JAR} \ -today ${date} diff --git a/azkaban/dmp_event_tag/event_3s_postback_tag.sh b/azkaban/dmp_event_tag/event_3s_postback_tag.sh index 888e770..5568ea3 100644 --- a/azkaban/dmp_event_tag/event_3s_postback_tag.sh +++ b/azkaban/dmp_event_tag/event_3s_postback_tag.sh @@ -46,7 +46,6 @@ spark-submit --class mobvista.dmp.datasource.event_tag.PostBackEvent \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 50 \ ../${JAR} -output ${OUTPUT_PATH} -coalesce 200 \ diff --git a/azkaban/dmp_event_tag/event_3s_tag.sh b/azkaban/dmp_event_tag/event_3s_tag.sh index 67d14cd..b39fbc7 100644 --- a/azkaban/dmp_event_tag/event_3s_tag.sh +++ b/azkaban/dmp_event_tag/event_3s_tag.sh @@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.datasource.event_tag.Dmp3sEventTag \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ --name dmp_3s_event_tag \ --conf spark.akka.frameSize=100 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --driver-memory 4g --executor-cores 2 --executor-memory 4G --num-executors 20 \ ../${JAR} \ -today ${date} -input ${input_path} -output ${OUTPUT_PATH} -coalesce 100 diff --git a/azkaban/dmp_event_tag/event_ga_tag.sh b/azkaban/dmp_event_tag/event_ga_tag.sh index b391380..32486b4 100644 --- a/azkaban/dmp_event_tag/event_ga_tag.sh +++ b/azkaban/dmp_event_tag/event_ga_tag.sh @@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.event_tag.DmpGaEventTag \ --conf spark.shuffle.compress=true \ --conf spark.shuffle.spill.compress=true \ --name dmp_ga_event_tag \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/json-serde-1.3.7-jar-with-dependencies.jar \ --master yarn --deploy-mode cluster --driver-memory 4g --executor-cores 2 --executor-memory 4G --num-executors 160 \ ../${JAR} \ diff --git a/azkaban/dmp_event_tag/event_ss_tag.sh b/azkaban/dmp_event_tag/event_ss_tag.sh index 146fb16..017244e 100644 --- a/azkaban/dmp_event_tag/event_ss_tag.sh +++ b/azkaban/dmp_event_tag/event_ss_tag.sh @@ -36,7 +36,6 @@ spark-submit --class mobvista.dmp.datasource.event_tag.DmpSsEventTag \ --conf spark.shuffle.consolidateFiles=true \ --conf spark.dynamicAllocation.enabled=false \ --conf spark.akka.frameSize=100 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --driver-memory 4g --executor-cores 2 --executor-memory 4G --num-executors 20 \ ../${JAR} \ -today ${date} -input "${input_path}/*/*" -output ${OUTPUT_PATH} -coalesce 100 diff --git a/azkaban/dsp/dsp_dc_interest.sh b/azkaban/dsp/dsp_dc_interest.sh index aa67db8..c8d44ad 100644 --- a/azkaban/dsp/dsp_dc_interest.sh +++ b/azkaban/dsp/dsp_dc_interest.sh @@ -39,7 +39,6 @@ hadoop fs -rm -r ${OUTPUT_PATH} spark-submit --class mobvista.dmp.datasource.dsp.dc.interest.DmDCInterestTagV2 \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=500 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 40 \ ../${JAR} -input $DSP_REQ_DAILY_PATH -output $OUTPUT_PATH -yestoday $old_date \ -dictPath $SEGMENT_TAG_PATH -parallelism 500 -coalesce 500 -date $modify_date diff --git a/azkaban/dsp/dsp_device_mapping.sh b/azkaban/dsp/dsp_device_mapping.sh index 8217866..9e41e3c 100644 --- a/azkaban/dsp/dsp_device_mapping.sh +++ b/azkaban/dsp/dsp_device_mapping.sh @@ -20,7 +20,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspDeviceIdMapping \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 100 \ ../${JAR} -date ${dt} -output ${OUTPUT} -coalesce 100 diff --git a/azkaban/dsp/dsp_etl_daily_ext.sh b/azkaban/dsp/dsp_etl_daily_ext.sh index 4e504a4..fe00795 100644 --- a/azkaban/dsp/dsp_etl_daily_ext.sh +++ b/azkaban/dsp/dsp_etl_daily_ext.sh @@ -51,7 +51,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspReqImeiDealDaily \ --conf spark.sql.shuffle.partitions=1000 \ --conf spark.default.parallelism=200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name DspReqImeiDealDaily --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 20 \ ../${JAR} -output "$OUTPUT_PATH" -coalesce 200 \ diff --git a/azkaban/dsp/dsp_org_etl_daily.sh b/azkaban/dsp/dsp_org_etl_daily.sh index d92c45e..d0d29b7 100644 --- a/azkaban/dsp/dsp_org_etl_daily.sh +++ b/azkaban/dsp/dsp_org_etl_daily.sh @@ -20,7 +20,6 @@ hadoop fs -rm -r ${TMP_OUTPUT_PATH} spark-submit --class mobvista.dmp.datasource.dsp.DspOrgEtlDailys \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=10000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 100 \ ../${JAR} -input $ETL_DSP_REQ_ETL_HOURS_INPUT_PATH \ -output ${TMP_OUTPUT_PATH} \ diff --git a/azkaban/dsp/dsp_org_etl_hours.sh b/azkaban/dsp/dsp_org_etl_hours.sh index e2365ac..2e1c23c 100644 --- a/azkaban/dsp/dsp_org_etl_hours.sh +++ b/azkaban/dsp/dsp_org_etl_hours.sh @@ -36,7 +36,6 @@ hadoop fs -rmr -r $MDS_DSP_REQ_HOURS_PATH spark-submit --class mobvista.dmp.datasource.dsp.DspOrgLogEtlHoursDemo \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.sql.shuffle.partitions=2000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 12g --driver-memory 6g --executor-cores 5 --num-executors 60 \ ../${JAR} -yyyymmddhh $input_date -output $ETL_DSP_REQ_HOURS_PATH -outputmds $MDS_DSP_REQ_HOURS_PATH -coalesce 2000 || exit 1 diff --git a/azkaban/dsp/dsp_req_device_tag.sh b/azkaban/dsp/dsp_req_device_tag.sh index fae1cb3..2917d05 100644 --- a/azkaban/dsp/dsp_req_device_tag.sh +++ b/azkaban/dsp/dsp_req_device_tag.sh @@ -41,7 +41,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.shuffle.file.buffer.kb=128 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 3 --num-executors 256 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 5000 diff --git a/azkaban/dsp/dsp_req_device_tag_daily.sh b/azkaban/dsp/dsp_req_device_tag_daily.sh index 67eb628..6928bd4 100644 --- a/azkaban/dsp/dsp_req_device_tag_daily.sh +++ b/azkaban/dsp/dsp_req_device_tag_daily.sh @@ -35,8 +35,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspReqTagDaily \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.memory.offHeap.enabled=true \ --conf spark.memory.offHeap.size=4294967296 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 150 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 200 diff --git a/azkaban/dsp/dsp_req_device_tag_v2.sh b/azkaban/dsp/dsp_req_device_tag_v2.sh index 2f9577a..85d2275 100644 --- a/azkaban/dsp/dsp_req_device_tag_v2.sh +++ b/azkaban/dsp/dsp_req_device_tag_v2.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.storage.memoryFraction=0.5 \ --conf spark.shuffle.memoryFraction=0.3 \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 200 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1000 diff --git a/azkaban/dsp/dsp_req_install_total_orc.sh b/azkaban/dsp/dsp_req_install_total_orc.sh index 054e576..90e922b 100644 --- a/azkaban/dsp/dsp_req_install_total_orc.sh +++ b/azkaban/dsp/dsp_req_install_total_orc.sh @@ -36,7 +36,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 18g --driver-memory 6g --executor-cores 5 --num-executors 100 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 4000 diff --git a/azkaban/dsp/dsp_req_pkg_total_v1.sh b/azkaban/dsp/dsp_req_pkg_total_v1.sh index 3cfe827..60b0398 100644 --- a/azkaban/dsp/dsp_req_pkg_total_v1.sh +++ b/azkaban/dsp/dsp_req_pkg_total_v1.sh @@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../${JAR} \ -date ${date} -coalesce 1000 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING} diff --git a/azkaban/dsp/dsp_req_pkg_total_v2.sh b/azkaban/dsp/dsp_req_pkg_total_v2.sh index 9dbc1b8..ee47d5f 100644 --- a/azkaban/dsp/dsp_req_pkg_total_v2.sh +++ b/azkaban/dsp/dsp_req_pkg_total_v2.sh @@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 180 \ ../${JAR} -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/dsp/dsp_req_profile_total.sh b/azkaban/dsp/dsp_req_profile_total.sh index 846a5d1..4a899f6 100644 --- a/azkaban/dsp/dsp_req_profile_total.sh +++ b/azkaban/dsp/dsp_req_profile_total.sh @@ -25,8 +25,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspDeviceProfile \ --conf spark.default.parallelism=2000 \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 100 \ ../${JAR} \ -date ${LOG_TIME} -output ${OUTPUT} diff --git a/azkaban/dsp/dsp_req_unmatch_install_total_orc.sh b/azkaban/dsp/dsp_req_unmatch_install_total_orc.sh index bc32c44..98b652e 100644 --- a/azkaban/dsp/dsp_req_unmatch_install_total_orc.sh +++ b/azkaban/dsp/dsp_req_unmatch_install_total_orc.sh @@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.files.maxPartitionBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 20 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 100 diff --git a/azkaban/dsp/impre_info_compl/log_adn_dsp_click_impression_combine.sh b/azkaban/dsp/impre_info_compl/log_adn_dsp_click_impression_combine.sh index 3fb39d2..ee1dbb9 100644 --- a/azkaban/dsp/impre_info_compl/log_adn_dsp_click_impression_combine.sh +++ b/azkaban/dsp/impre_info_compl/log_adn_dsp_click_impression_combine.sh @@ -34,7 +34,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspClickImpressionCombineOrc \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 8G \ diff --git a/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_combine.sh b/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_combine.sh index fe8a910..167036b 100644 --- a/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_combine.sh +++ b/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_combine.sh @@ -75,7 +75,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspImpressionHourCombine \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.yarn.executor.memoryOverhead=4096 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 10G \ diff --git a/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_cn.sh b/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_cn.sh index 7b4ff97..5e9435d 100644 --- a/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_cn.sh +++ b/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_cn.sh @@ -42,7 +42,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspImpressionHourFullCn \ --conf spark.default.parallelism=6 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.yarn.executor.memoryOverhead=1024 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 2G \ diff --git a/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_tk.sh b/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_tk.sh index a9c8b97..3e92612 100644 --- a/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_tk.sh +++ b/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_tk.sh @@ -45,7 +45,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspImpressionHourFullOther \ --conf spark.default.parallelism=2000 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.yarn.executor.memoryOverhead=2048 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 6G \ diff --git a/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_vg.sh b/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_vg.sh index 7581fc5..859eb98 100644 --- a/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_vg.sh +++ b/azkaban/dsp/impre_info_compl/log_adn_dsp_impression_hour_full_vg.sh @@ -42,7 +42,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspImpressionHourFullOther \ --conf spark.default.parallelism=2500 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.yarn.executor.memoryOverhead=2048 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 6G \ diff --git a/azkaban/dsp/log_adn_dsp_impression_hour_full.sh b/azkaban/dsp/log_adn_dsp_impression_hour_full.sh index a311d33..f1a53aa 100644 --- a/azkaban/dsp/log_adn_dsp_impression_hour_full.sh +++ b/azkaban/dsp/log_adn_dsp_impression_hour_full.sh @@ -39,7 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspImpressionHourFull \ --conf spark.default.parallelism=6 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.yarn.executor.memoryOverhead=1024 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 2G \ diff --git a/azkaban/dsp/tmp_extract_data_from_dsp_req.sh b/azkaban/dsp/tmp_extract_data_from_dsp_req.sh index cd13c99..771635a 100644 --- a/azkaban/dsp/tmp_extract_data_from_dsp_req.sh +++ b/azkaban/dsp/tmp_extract_data_from_dsp_req.sh @@ -21,7 +21,6 @@ hadoop fs -rm -r ${Tmp_Extract_Data_From_DspReq_Path} spark-submit --class mobvista.dmp.datasource.dsp.TmpExtractDataFromDspReq \ --conf spark.yarn.executor.memoryOverhead=3072 \ --conf spark.sql.shuffle.partitions=10000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 100 \ ../${JAR} -input $ETL_DSP_REQ_ETL_HOURS_INPUT_PATH \ -output ${Tmp_Extract_Data_From_DspReq_Path} \ diff --git a/azkaban/event_tag/Ga_purchase_event.sh b/azkaban/event_tag/Ga_purchase_event.sh index b3f20a1..3d1e2b5 100644 --- a/azkaban/event_tag/Ga_purchase_event.sh +++ b/azkaban/event_tag/Ga_purchase_event.sh @@ -51,7 +51,5 @@ spark-submit --class mobvista.dmp.datasource.event_tag.Ga_purchase_event \ --conf spark.shuffle.io.numConnectionsPerPeer=10 \ --conf spark.shuffle.consolidateFiles=true \ --conf spark.dynamicAllocation.enabled=false \ - --files /data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --conf spark.akka.frameSize=100 --num-executors 35 --executor-cores 2 --executor-memory 5G \ ../${JAR} diff --git a/azkaban/event_tag/behavior_thirdparty_datasource_manual_daily.sh b/azkaban/event_tag/behavior_thirdparty_datasource_manual_daily.sh index 6ddb674..dca1d96 100644 --- a/azkaban/event_tag/behavior_thirdparty_datasource_manual_daily.sh +++ b/azkaban/event_tag/behavior_thirdparty_datasource_manual_daily.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.behavior.ThirdPartySourceDaily \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=10 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name behavior_from_third_party_daily --executor-memory 2g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} -outputtotal ${OUTPUT_TOTAL_PATH} -coalesce 10 \ diff --git a/azkaban/event_tag/behavior_thirdparty_datasource_total.sh b/azkaban/event_tag/behavior_thirdparty_datasource_total.sh index 345fc48..07d0e55 100644 --- a/azkaban/event_tag/behavior_thirdparty_datasource_total.sh +++ b/azkaban/event_tag/behavior_thirdparty_datasource_total.sh @@ -28,7 +28,6 @@ spark-submit --class mobvista.dmp.datasource.behavior.ThirdPartySourceTotal \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=10 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name behavior_from_third_party_total --executor-memory 2g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} -outputtotal ${OUTPUT_TOTAL_PATH} -dmpevent ${DMP_EVENT_TAG_PATH} -coalesce 10 \ diff --git a/azkaban/event_tag/event_tag.sh b/azkaban/event_tag/event_tag.sh index dffef38..be5b674 100644 --- a/azkaban/event_tag/event_tag.sh +++ b/azkaban/event_tag/event_tag.sh @@ -51,7 +51,6 @@ echo "3ss_offer_event_spec.csv file success exist, and then can start" spark-submit --class mobvista.dmp.datasource.event_tag.Event_tag \ --master yarn --deploy-mode cluster \ --conf spark.akka.frameSize=100 \ - --files /data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.driver.memory=4G --queue root.dataplatform \ --num-executors 40 --executor-cores 4 \ diff --git a/azkaban/event_tag/event_tag_source.sh b/azkaban/event_tag/event_tag_source.sh index 448443f..ea2c24f 100644 --- a/azkaban/event_tag/event_tag_source.sh +++ b/azkaban/event_tag/event_tag_source.sh @@ -27,4 +27,4 @@ deal_time=$(date -d "$ScheduleTime 1 days ago" +"%Y%m%d") echo "$deal_time" sleep 10s -spark-submit --class mobvista.dmp.datasource.event_tag.Dmp_event_source --master yarn --deploy-mode cluster --conf spark.akka.frameSize=100 --files /data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.driver.memory=2G --queue root.dataplatform --num-executors 35 --executor-cores 2 --executor-memory 5G --conf spark.shuffle.file.buffer=256k --conf spark.reducer.maxSizeInFlight=100m --conf spark.shuffle.io.maxRetries=60 --conf spark.shuffle.io.retryWait=60s --conf spark.network.timeout=1200 --conf spark.shuffle.compress=true --conf spark.io.compression.codec=lz4 --conf spark.driver.maxResultSize=2024M --conf spark.shuffle.spill.compress=true --name event_tag_source_daily --conf spark.app.loadTime=${deal_time} --conf spark.app.coalesce_num=500 --conf spark.yarn.executor.memoryOverhead=3048 --conf spark.locality.wait=0 --conf spark.shuffle.io.numConnectionsPerPeer=10 --conf spark.shuffle.consolidateFiles=true --conf spark.dynamicAllocation.enabled=false ../DMP.jar +spark-submit --class mobvista.dmp.datasource.event_tag.Dmp_event_source --master yarn --deploy-mode cluster --conf spark.akka.frameSize=100 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.driver.memory=2G --queue root.dataplatform --num-executors 35 --executor-cores 2 --executor-memory 5G --conf spark.shuffle.file.buffer=256k --conf spark.reducer.maxSizeInFlight=100m --conf spark.shuffle.io.maxRetries=60 --conf spark.shuffle.io.retryWait=60s --conf spark.network.timeout=1200 --conf spark.shuffle.compress=true --conf spark.io.compression.codec=lz4 --conf spark.driver.maxResultSize=2024M --conf spark.shuffle.spill.compress=true --name event_tag_source_daily --conf spark.app.loadTime=${deal_time} --conf spark.app.coalesce_num=500 --conf spark.yarn.executor.memoryOverhead=3048 --conf spark.locality.wait=0 --conf spark.shuffle.io.numConnectionsPerPeer=10 --conf spark.shuffle.consolidateFiles=true --conf spark.dynamicAllocation.enabled=false ../DMP.jar diff --git a/azkaban/facebook/facebook_daily.sh b/azkaban/facebook/facebook_daily.sh index d96061a..34759fa 100644 --- a/azkaban/facebook/facebook_daily.sh +++ b/azkaban/facebook/facebook_daily.sh @@ -48,8 +48,6 @@ spark-submit --class mobvista.dmp.datasource.facebook.FaceBookDaily \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=500 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ - --jars ${path_dir}/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name facebook_daily_kehan --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 50 \ ../${JAR} -input ${INPUT_PATH} -output ${OUTPUT_PATH} -unmatched ${UNMATCHED_OUTPUT_PATH} -coalesce 200 \ -today ${dt_today} -yesterday ${dt_yesterday} -last_sunday ${dt_yesterday} diff --git a/azkaban/facebook/facebook_device_tag.sh b/azkaban/facebook/facebook_device_tag.sh index 177fd3f..37fc96c 100644 --- a/azkaban/facebook/facebook_device_tag.sh +++ b/azkaban/facebook/facebook_device_tag.sh @@ -33,8 +33,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=100 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 10 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 10 diff --git a/azkaban/facebook/facebook_device_tag_daily.sh b/azkaban/facebook/facebook_device_tag_daily.sh index 60713c1..06e3c87 100644 --- a/azkaban/facebook/facebook_device_tag_daily.sh +++ b/azkaban/facebook/facebook_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.facebook.FaceBookTagDaily \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 5 diff --git a/azkaban/facebook/facebook_device_tag_v2.sh b/azkaban/facebook/facebook_device_tag_v2.sh index 2256dab..bc8f7fe 100644 --- a/azkaban/facebook/facebook_device_tag_v2.sh +++ b/azkaban/facebook/facebook_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=4 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4 diff --git a/azkaban/facebook/facebook_install_total_orc.sh b/azkaban/facebook/facebook_install_total_orc.sh index b9b4a60..62bec89 100644 --- a/azkaban/facebook/facebook_install_total_orc.sh +++ b/azkaban/facebook/facebook_install_total_orc.sh @@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=10 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 4 diff --git a/azkaban/facebook/facebook_total.sh b/azkaban/facebook/facebook_total.sh index 5dc8835..f52f842 100644 --- a/azkaban/facebook/facebook_total.sh +++ b/azkaban/facebook/facebook_total.sh @@ -38,7 +38,6 @@ spark-submit --class mobvista.dmp.datasource.facebook.FaceBookTotal \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=10 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name facebook_device_total --executor-memory 2g --driver-memory 2g --executor-cores 2 --num-executors 3 \ ../${JAR} -outputtotal ${OUTPUT_TOTAL_PATH} -outputgender ${GENDER_OUTPUT_PATH} -coalesce 10 \ diff --git a/azkaban/fmp/fmp_insight.sh b/azkaban/fmp/fmp_insight.sh index 141bcba..7c0d0e0 100644 --- a/azkaban/fmp/fmp_insight.sh +++ b/azkaban/fmp/fmp_insight.sh @@ -28,7 +28,6 @@ spark-submit --class mobvista.dmp.datasource.fmp.FmpInsightJob \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=134217728 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 6 --num-executors 64 \ ../${JAR} \ -data ${data// /###} -output ${output_path} -coalesce 1 diff --git a/azkaban/ga/ga_device_tag_v2.sh b/azkaban/ga/ga_device_tag_v2.sh index b8142cd..5ec0db1 100644 --- a/azkaban/ga/ga_device_tag_v2.sh +++ b/azkaban/ga/ga_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=100 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 20 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 40 diff --git a/azkaban/ga/ga_install_device_tag.sh b/azkaban/ga/ga_install_device_tag.sh index e172ee3..cad42b3 100644 --- a/azkaban/ga/ga_install_device_tag.sh +++ b/azkaban/ga/ga_install_device_tag.sh @@ -32,8 +32,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=200 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 20 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 200 diff --git a/azkaban/ga/ga_install_device_tag_daily.sh b/azkaban/ga/ga_install_device_tag_daily.sh index ae4c86a..61a0ba2 100644 --- a/azkaban/ga/ga_install_device_tag_daily.sh +++ b/azkaban/ga/ga_install_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.ga.GaTagDaily \ --conf spark.sql.shuffle.partitions=15 \ --conf spark.default.parallelism=5 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 10 diff --git a/azkaban/ga/ga_install_total_orc.sh b/azkaban/ga/ga_install_total_orc.sh index d5810d2..e2df49e 100644 --- a/azkaban/ga/ga_install_total_orc.sh +++ b/azkaban/ga/ga_install_total_orc.sh @@ -35,8 +35,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 200 diff --git a/azkaban/ga/ga_install_total_v2.sh b/azkaban/ga/ga_install_total_v2.sh index 9357c7b..7a6f2d7 100644 --- a/azkaban/ga/ga_install_total_v2.sh +++ b/azkaban/ga/ga_install_total_v2.sh @@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 2 --num-executors 15 \ ../${JAR} -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/ga/ga_prepare.sh b/azkaban/ga/ga_prepare.sh index 9498a16..d7b2b36 100644 --- a/azkaban/ga/ga_prepare.sh +++ b/azkaban/ga/ga_prepare.sh @@ -32,7 +32,5 @@ spark-submit --class mobvista.dmp.datasource.ga.GaParser \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.2 \ - --files /data/azkaban-hadoop/command-home/spark-offline/conf/hive-site.xml \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 180 \ ${JAR} -date_str_midline ${MID_TIME} -input ${INPUT_PATH} -output ${OUTPUT_PATH} -coalesce 2000 \ No newline at end of file diff --git a/azkaban/gender/gender_thirdparty_datasource_manual_daily.sh b/azkaban/gender/gender_thirdparty_datasource_manual_daily.sh index fb4b274..028c0f5 100644 --- a/azkaban/gender/gender_thirdparty_datasource_manual_daily.sh +++ b/azkaban/gender/gender_thirdparty_datasource_manual_daily.sh @@ -28,7 +28,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.ThirdPartySourceDaily \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=10 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name gender_from_third_party_daily --executor-memory 2g --driver-memory 2g --executor-cores 2 --num-executors 2 \ ../${JAR} -outputtotal ${OUTPUT_TOTAL_PATH} -coalesce 10 \ diff --git a/azkaban/gender/gender_thirdparty_datasource_total.sh b/azkaban/gender/gender_thirdparty_datasource_total.sh index d835191..bf194b3 100644 --- a/azkaban/gender/gender_thirdparty_datasource_total.sh +++ b/azkaban/gender/gender_thirdparty_datasource_total.sh @@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.ThirdPartySourceTotal \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=10 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \ --master yarn --deploy-mode cluster --name gender_from_third_party --executor-memory 2g --driver-memory 2g --executor-cores 2 --num-executors 3 \ ../${JAR} -outputtotal ${OUTPUT_TOTAL_PATH} -outputgender ${GENDER_OUTPUT_PATH} -coalesce 10 \ diff --git a/azkaban/gender/merge_device_gender.sh b/azkaban/gender/merge_device_gender.sh index a60b34e..252bf43 100644 --- a/azkaban/gender/merge_device_gender.sh +++ b/azkaban/gender/merge_device_gender.sh @@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.MergeDeviceGenderLR \ --conf spark.speculation=false \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.5 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 50 \ ../${JAR} -date ${LOG_TIME} -gender_predict_input ${GENDER_PRIDICT_SCORE_INPUT} -gender_threshold_dict_input ${GDNEER_THRESHOLD_DICT_PATH} -gender_device_output ${GENDER_DEVICE_OUTPUT} -parallelism 400 diff --git a/azkaban/gender/merge_install_gender.sh b/azkaban/gender/merge_install_gender.sh index 46dde81..b67d0f5 100644 --- a/azkaban/gender/merge_install_gender.sh +++ b/azkaban/gender/merge_install_gender.sh @@ -49,7 +49,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.MergeInstallGender \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 60 \ ../${JAR} -date ${LOG_TIME} \ -ga_gender_path ${GA_GENDER_PATH} -dsp_gender_path ${DSP_GENDER_PATH} -fb_gender_path ${FB_GENDER_PATH} -tp_gender_path ${TP_GENDER_PATH} -gender_output ${GENDER_OUTPUT} -parallelism 2000 diff --git a/azkaban/gender/merge_install_gender_v2.sh b/azkaban/gender/merge_install_gender_v2.sh index b821fd7..d352235 100644 --- a/azkaban/gender/merge_install_gender_v2.sh +++ b/azkaban/gender/merge_install_gender_v2.sh @@ -81,8 +81,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.MergeInstallGenderLR \ --conf spark.shuffle.memoryFraction=0.4 \ --conf spark.sql.files.maxPartitionBytes=134217728 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar,s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/json-serde-1.3.7-jar-with-dependencies.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 5 --num-executors 100 \ ../${JAR} -date ${LOG_TIME} -dt_yesterday ${dt_yesterday} -ga_date ${GA_INSTALL_DATE} -other_date ${OTHER_DATE} \ -gender_output ${GENDER_OUTPUT} -gender_bin_output ${GENDER_BIN_OUTPUT} -org_gender_bin_output ${ORG_OUTPUT_PATH} -calc_gender_bin_output ${CALC_OUTPUT_PATH} -parallelism 1000 diff --git a/azkaban/install_ruid/etl_ruid_mapping.sh b/azkaban/install_ruid/etl_ruid_mapping.sh index fa38309..d866ca3 100644 --- a/azkaban/install_ruid/etl_ruid_mapping.sh +++ b/azkaban/install_ruid/etl_ruid_mapping.sh @@ -34,7 +34,6 @@ spark-submit --class mobvista.dmp.datasource.dm.EtlRuidMapping \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../${JAR} \ -date ${date} -output $OUTPUT_PATH diff --git a/azkaban/iqiyi/foractivation_qiyi_oppo.sh b/azkaban/iqiyi/foractivation_qiyi_oppo.sh index 03dd03b..a4a7c8b 100644 --- a/azkaban/iqiyi/foractivation_qiyi_oppo.sh +++ b/azkaban/iqiyi/foractivation_qiyi_oppo.sh @@ -21,7 +21,6 @@ spark-submit --class mobvista.dmp.datasource.iqiyi.EtlOtherIqiyiActivationData \ --conf spark.sql.shuffle.partitions=2000 \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 12g --driver-memory 4g --executor-cores 5 --num-executors 80 \ ../${JAR} \ -output ${OUTPUT_PATH} \ diff --git a/azkaban/iqiyi/iqiyi_ck.sh b/azkaban/iqiyi/iqiyi_ck.sh index ad240c3..9074ad6 100644 --- a/azkaban/iqiyi/iqiyi_ck.sh +++ b/azkaban/iqiyi/iqiyi_ck.sh @@ -28,7 +28,6 @@ spark-submit --class mobvista.dmp.datasource.iqiyi.IQiYiEtl2CK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/iqiyi/iqiyi_daily.sh b/azkaban/iqiyi/iqiyi_daily.sh index 59dce08..7aac3be 100644 --- a/azkaban/iqiyi/iqiyi_daily.sh +++ b/azkaban/iqiyi/iqiyi_daily.sh @@ -19,7 +19,6 @@ spark-submit --class mobvista.dmp.datasource.iqiyi.IQiYiDaily \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files /data/hadoop-config/command-home/hdp-spark-2.3.1-offline/conf/hive-site.xml \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 10 \ ../${JAR} -date ${LOG_TIME} -partNum 10 -output ${OUTPUT_PATH} -cluster 'cluster_1st' diff --git a/azkaban/iqiyi/iqiyi_install_total_orc.sh b/azkaban/iqiyi/iqiyi_install_total_orc.sh index 5fee0c9..02feca0 100644 --- a/azkaban/iqiyi/iqiyi_install_total_orc.sh +++ b/azkaban/iqiyi/iqiyi_install_total_orc.sh @@ -29,8 +29,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=200 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 40 diff --git a/azkaban/iqiyi/iqiyi_lahuo_ck.sh b/azkaban/iqiyi/iqiyi_lahuo_ck.sh index 3e140b8..3a3ecd1 100644 --- a/azkaban/iqiyi/iqiyi_lahuo_ck.sh +++ b/azkaban/iqiyi/iqiyi_lahuo_ck.sh @@ -26,7 +26,6 @@ spark-submit --class mobvista.dmp.datasource.iqiyi.IQiYiLaHuoCK \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf hive.exec.orc.default.stripe.size=268435456 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../${JAR} -input ${INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} diff --git a/azkaban/iqiyi/iqiyi_lahuo_daily.sh b/azkaban/iqiyi/iqiyi_lahuo_daily.sh index ac3aba3..824f722 100644 --- a/azkaban/iqiyi/iqiyi_lahuo_daily.sh +++ b/azkaban/iqiyi/iqiyi_lahuo_daily.sh @@ -32,7 +32,6 @@ spark-submit --class mobvista.dmp.datasource.iqiyi.IQiYiLaHuoDaily \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 50 \ ../${JAR} -date ${LOG_TIME} -days 7 -output ${OUTPUT_PATH} diff --git a/azkaban/iqiyi/iqiyi_lahuo_df.sh b/azkaban/iqiyi/iqiyi_lahuo_df.sh index 55b539b..10a99df 100644 --- a/azkaban/iqiyi/iqiyi_lahuo_df.sh +++ b/azkaban/iqiyi/iqiyi_lahuo_df.sh @@ -20,7 +20,6 @@ spark-submit --class mobvista.dmp.datasource.iqiyi.IQiYiLaHuoDF \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files /data/hadoop-config/command-home/hdp-spark-2.3.1-offline/conf/hive-site.xml \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 10 \ ../${JAR} -date ${LOG_TIME} -partNum 10 -output ${OUTPUT_PATH} -cluster 'cluster_1st' diff --git a/azkaban/iqiyi/iqiyi_tmp_daily_data_to_dmp.sh b/azkaban/iqiyi/iqiyi_tmp_daily_data_to_dmp.sh index d34b762..7741d9b 100644 --- a/azkaban/iqiyi/iqiyi_tmp_daily_data_to_dmp.sh +++ b/azkaban/iqiyi/iqiyi_tmp_daily_data_to_dmp.sh @@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.datasource.iqiyi.IQiYiTmpDataToDMP \ --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 30 \ ../${JAR} -input ${INPUT} \ -output ${OUTPUT} \ @@ -52,7 +51,6 @@ spark-submit --class mobvista.dmp.datasource.iqiyi.IQiYiLaHuoFourDaysDataDedupli --conf spark.sql.broadcastTimeout=1200 \ --conf spark.yarn.executor.memoryOverhead=4096 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \ ../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \ -output ${FOUR_DAYS_OUTPUT} diff --git a/azkaban/joypac/joypac_device_tag.sh b/azkaban/joypac/joypac_device_tag.sh index 9558bdb..1b8d651 100644 --- a/azkaban/joypac/joypac_device_tag.sh +++ b/azkaban/joypac/joypac_device_tag.sh @@ -33,8 +33,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=10 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 6g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 10 diff --git a/azkaban/joypac/joypac_device_tag_daily.sh b/azkaban/joypac/joypac_device_tag_daily.sh index 54c94e4..8bdad3d 100644 --- a/azkaban/joypac/joypac_device_tag_daily.sh +++ b/azkaban/joypac/joypac_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.joypac.JoypacTagDaily \ --conf spark.sql.shuffle.partitions=5 \ --conf spark.default.parallelism=5 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 3 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1 diff --git a/azkaban/joypac/joypac_device_tag_v2.sh b/azkaban/joypac/joypac_device_tag_v2.sh index c1b882f..bb82aee 100644 --- a/azkaban/joypac/joypac_device_tag_v2.sh +++ b/azkaban/joypac/joypac_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=8 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4 diff --git a/azkaban/joypac/joypac_install_total_orc.sh b/azkaban/joypac/joypac_install_total_orc.sh index 26ba73d..ac15735 100644 --- a/azkaban/joypac/joypac_install_total_orc.sh +++ b/azkaban/joypac/joypac_install_total_orc.sh @@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=10 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 4 diff --git a/azkaban/joypac/joypac_result_all.sh b/azkaban/joypac/joypac_result_all.sh index 031582b..edc8ff6 100644 --- a/azkaban/joypac/joypac_result_all.sh +++ b/azkaban/joypac/joypac_result_all.sh @@ -22,8 +22,6 @@ spark-submit --class mobvista.dmp.datasource.joypac.JoypacResultAll \ --conf spark.default.parallelism=20 \ --master yarn --deploy-mode cluster --name JoypacResutlAll \ --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ ../${JAR} -output ${OUTPUT_PATH} -coalesce 10 -date ${LOG_TIME} if [[ $? -ne 0 ]];then diff --git a/azkaban/joypac/joypac_result_daily.sh b/azkaban/joypac/joypac_result_daily.sh index b69987c..4c57d69 100644 --- a/azkaban/joypac/joypac_result_daily.sh +++ b/azkaban/joypac/joypac_result_daily.sh @@ -24,8 +24,6 @@ spark-submit --class mobvista.dmp.datasource.joypac.JoypacResultDaily \ --conf spark.default.parallelism=20 \ --master yarn --deploy-mode cluster --name JoypacResutlDaily \ --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ ../${JAR} -dict ${DICT_PATH} -output ${OUTPUT_PATH} -coalesce 10 -date ${LOG_TIME} -cluster ${cluster} if [[ $? -ne 0 ]];then diff --git a/azkaban/joypac/joypac_result_etl.sh b/azkaban/joypac/joypac_result_etl.sh index 1891248..cd68919 100644 --- a/azkaban/joypac/joypac_result_etl.sh +++ b/azkaban/joypac/joypac_result_etl.sh @@ -30,7 +30,6 @@ spark-submit --class mobvista.dmp.datasource.joypac.JoypacResultEtl \ --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \ --master yarn --deploy-mode cluster --name JoypacResutlEtl \ --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ - --jars ${JARS} \ ../${JAR} -input ${INPUT_PATH} -output ${OUTPUT_PATH} -coalesce 10 -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} if [[ $? -ne 0 ]];then diff --git a/azkaban/joypac/joypac_user_info.sh b/azkaban/joypac/joypac_user_info.sh index 0318630..0d97fef 100644 --- a/azkaban/joypac/joypac_user_info.sh +++ b/azkaban/joypac/joypac_user_info.sh @@ -21,8 +21,6 @@ spark-submit --class mobvista.dmp.datasource.joypac.JoypacUserFeatureJob \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 100 \ ../${JAR} -date ${LOG_TIME} -output ${OUTPUT_PATH} -dict_output ${DICT_PATH} -coalesce 200 diff --git a/azkaban/joypac/joypac_user_info_cassandra.sh b/azkaban/joypac/joypac_user_info_cassandra.sh index 6af876b..b117db5 100644 --- a/azkaban/joypac/joypac_user_info_cassandra.sh +++ b/azkaban/joypac/joypac_user_info_cassandra.sh @@ -19,8 +19,6 @@ spark-submit --class mobvista.dmp.datasource.retargeting.UserFeatureCassandra \ --conf spark.default.parallelism=1000 \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 3 \ ../${JAR} -input ${INPUT_PATH} -region ${REGION} diff --git a/azkaban/mp/mp_request_daily.sh b/azkaban/mp/mp_request_daily.sh index d7c7abf..968ea87 100644 --- a/azkaban/mp/mp_request_daily.sh +++ b/azkaban/mp/mp_request_daily.sh @@ -23,7 +23,6 @@ export HADOOP_CLIENT_OPTS="-Xmx4096m $HADOOP_CLIENT_OPTS" spark-submit --class mobvista.dmp.datasource.mpsdk.ParseMPSDKDaily \ --conf spark.sql.shuffle.partitions=200 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 20 \ ../${JAR} -input $INPUT_PATH -output $OUTPUT_PATH -mappingPath $ID_MAPPING_PATH -parallelism 100 -coalesce 20 diff --git a/azkaban/mp/mp_request_device_tag.sh b/azkaban/mp/mp_request_device_tag.sh index e538e9e..6651aef 100644 --- a/azkaban/mp/mp_request_device_tag.sh +++ b/azkaban/mp/mp_request_device_tag.sh @@ -33,8 +33,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.sql.shuffle.partitions=1000 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 50 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1000 diff --git a/azkaban/mp/mp_request_device_tag_daily.sh b/azkaban/mp/mp_request_device_tag_daily.sh index ac616ef..67f573a 100644 --- a/azkaban/mp/mp_request_device_tag_daily.sh +++ b/azkaban/mp/mp_request_device_tag_daily.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.mpsdk.MpSdkTagDaily \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=10 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 10 diff --git a/azkaban/mp/mp_request_device_tag_v2.sh b/azkaban/mp/mp_request_device_tag_v2.sh index 8419078..dc03e6e 100644 --- a/azkaban/mp/mp_request_device_tag_v2.sh +++ b/azkaban/mp/mp_request_device_tag_v2.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \ --conf spark.default.parallelism=200 \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 20 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100 diff --git a/azkaban/mp/mp_request_install_list_v2.sh b/azkaban/mp/mp_request_install_list_v2.sh index 77e63ec..9cfc37f 100644 --- a/azkaban/mp/mp_request_install_list_v2.sh +++ b/azkaban/mp/mp_request_install_list_v2.sh @@ -35,8 +35,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.8 \ --conf spark.speculation.multiplier=1.2 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 80 \ ../${JAR} -output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business} diff --git a/azkaban/mp/mp_request_install_total_orc.sh b/azkaban/mp/mp_request_install_total_orc.sh index 63a3a92..86c647d 100644 --- a/azkaban/mp/mp_request_install_total_orc.sh +++ b/azkaban/mp/mp_request_install_total_orc.sh @@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=400 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 25 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 200 diff --git a/azkaban/mparticle/mparticle_install_total_orc.sh b/azkaban/mparticle/mparticle_install_total_orc.sh index a07ff87..a8e5ac1 100644 --- a/azkaban/mparticle/mparticle_install_total_orc.sh +++ b/azkaban/mparticle/mparticle_install_total_orc.sh @@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \ --conf spark.default.parallelism=200 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \ ../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 40 diff --git a/azkaban/output/reyun/user_info.sh b/azkaban/output/reyun/user_info.sh index 5753522..d5a6d05 100644 --- a/azkaban/output/reyun/user_info.sh +++ b/azkaban/output/reyun/user_info.sh @@ -7,9 +7,9 @@ source ../../dmp_env.sh dt=$(date +"%Y%m%d" -d "-1 day $ScheduleTime") -date_path=$(date +"%Y%m/%d" -d "-1 day $ScheduleTime") +date_path=$(date +"%Y/%m/%d" -d "-1 day $ScheduleTime") -check_await ${ODS_DMP_USER_INFO_ALL}/${dt}/_SUCCESS +check_await ${ODS_DMP_USER_INFO_ALL}_v2/${dt}/_SUCCESS OUTPUT_PATH="${OUTPUT_REYUN_USER_INFO_PATH}/${date_path}" diff --git a/azkaban/package/get_package.sh b/azkaban/package/get_package.sh index 3340f59..a1a1bab 100644 --- a/azkaban/package/get_package.sh +++ b/azkaban/package/get_package.sh @@ -22,8 +22,6 @@ spark-submit --class mobvista.dmp.datasource.retargeting.GetPackage \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 5 --num-executors 128 \ ../${JAR} \ -date ${date} diff --git a/azkaban/package/merge_pkg_tag.sh b/azkaban/package/merge_pkg_tag.sh index fe904bd..82d953c 100755 --- a/azkaban/package/merge_pkg_tag.sh +++ b/azkaban/package/merge_pkg_tag.sh @@ -24,8 +24,6 @@ spark-submit --class mobvista.dmp.datasource.apptag.MergeAppTagID \ --conf spark.kryoserializer.buffer.max=512m \ --conf spark.kryoserializer.buffer=64m \ --conf dfs.socket.timeout=300000 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 6g --executor-cores 2 --num-executors 2 \ ../${JAR} -output ${output_path} -date ${date} diff --git a/azkaban/package/package_mapping.sh b/azkaban/package/package_mapping.sh index 365ceaa..2987c2e 100644 --- a/azkaban/package/package_mapping.sh +++ b/azkaban/package/package_mapping.sh @@ -19,8 +19,6 @@ spark-submit --class mobvista.dmp.datasource.retargeting.PackageMapping \ --conf spark.sql.shuffle.partitions=10 \ --conf spark.default.parallelism=10 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 1 --num-executors 1 \ ../${JAR} \ -output ${mount_path} diff --git a/azkaban/realtime/cassandra_sink.sh b/azkaban/realtime/cassandra_sink.sh index 36fb174..322729a 100644 --- a/azkaban/realtime/cassandra_sink.sh +++ b/azkaban/realtime/cassandra_sink.sh @@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.retargeting.UserFeatureCassandra \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 2g --driver-memory 2g --executor-cores 2 --num-executors ${cores} \ ../${JAR} \ -date ${date} -region ${region} -input ${ODS_USER_INFO_REGION_PATH} diff --git a/azkaban/realtime/dm_realtime_service.sh b/azkaban/realtime/dm_realtime_service.sh index aa17ba0..20671a4 100644 --- a/azkaban/realtime/dm_realtime_service.sh +++ b/azkaban/realtime/dm_realtime_service.sh @@ -37,7 +37,6 @@ spark-submit --class mobvista.dmp.datasource.retargeting.DeviceInfoJob \ --conf spark.sql.files.maxPartitionBytes=536870912 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 8g --executor-cores 3 --num-executors 100 \ ../${JAR} \ -date ${date} -output ${output_path} -coalesce 2000 diff --git a/azkaban/realtime/dm_realtime_service_region.sh b/azkaban/realtime/dm_realtime_service_region.sh index 198a9f8..6a183af 100644 --- a/azkaban/realtime/dm_realtime_service_region.sh +++ b/azkaban/realtime/dm_realtime_service_region.sh @@ -34,7 +34,6 @@ spark-submit --class mobvista.dmp.datasource.retargeting.UserFeatureFilterJob \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 3 --num-executors 100 \ ../${JAR} \ -date ${date} -output ${output_path} -coalesce 1000 -days ${days} diff --git a/azkaban/realtime/dmp_protal_job.sh b/azkaban/realtime/dmp_protal_job.sh index 14d4415..72e50cb 100644 --- a/azkaban/realtime/dmp_protal_job.sh +++ b/azkaban/realtime/dmp_protal_job.sh @@ -13,7 +13,6 @@ spark-submit --class mobvista.dmp.datasource.retargeting.RetargetingCassandra \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 2g --driver-memory 2g --executor-cores 2 --num-executors 2 \ ../${JAR} -region ${region} diff --git a/azkaban/realtime_v2/adn_etl_hour.sh b/azkaban/realtime_v2/adn_etl_hour.sh index 415e9c9..f24418b 100644 --- a/azkaban/realtime_v2/adn_etl_hour.sh +++ b/azkaban/realtime_v2/adn_etl_hour.sh @@ -47,7 +47,6 @@ spark-submit --class mobvista.dmp.datasource.adn.AdnRequestSdkHour \ --conf spark.sql.shuffle.partitions=${partition} \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.sql.files.maxPartitionBytes=134217728 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores ${cores} --num-executors ${executors} \ ../${JAR} -input ${INPUT_ADN_PATH} -output ${OUTPUT_PATH} -coalesce ${coalesce} -input_dict1 ${INPUT_MAPPING_PATH} -input_dict2 ${APPID_PACKAGE} diff --git a/azkaban/realtime_v2/dsp_etl_hour.sh b/azkaban/realtime_v2/dsp_etl_hour.sh index 14de7a4..69e2fc4 100644 --- a/azkaban/realtime_v2/dsp_etl_hour.sh +++ b/azkaban/realtime_v2/dsp_etl_hour.sh @@ -57,7 +57,6 @@ spark-submit --class mobvista.dmp.datasource.dsp.DspEtlHour \ --conf spark.sql.files.maxPartitionBytes=134217728 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 6g --executor-cores ${cores} --num-executors ${executors} \ ../${JAR} -input ${INPUT_DSP_PATH} -output ${OUTPUT_PATH} -detailOutPath ${detailOutPath} -coalesce ${coalesce} diff --git a/azkaban/realtime_v2/etl_hour_ck.sh b/azkaban/realtime_v2/etl_hour_ck.sh index 52491d9..b68a9bf 100644 --- a/azkaban/realtime_v2/etl_hour_ck.sh +++ b/azkaban/realtime_v2/etl_hour_ck.sh @@ -51,7 +51,6 @@ spark-submit --class mobvista.dmp.clickhouse.realtime.MergeEtlHourToCK \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 6g --executor-cores 2 --num-executors ${executors} \ ../${JAR} -date ${date} -host ${host} -cluster ${cluster} -database ${database} -table ${table} \ --input_dsp ${INPUT_DSP_PATH} --input_adn ${INPUT_ADN_PATH} --region ${region} --hour ${hour} --app_tag_input ${INPUT_APP_TAG_PATH} diff --git a/azkaban/realtime_v2/realtime_hour_cassandra_sink.sh b/azkaban/realtime_v2/realtime_hour_cassandra_sink.sh index f347e7a..24759c3 100644 --- a/azkaban/realtime_v2/realtime_hour_cassandra_sink.sh +++ b/azkaban/realtime_v2/realtime_hour_cassandra_sink.sh @@ -36,7 +36,6 @@ spark-submit --class mobvista.dmp.clickhouse.realtime.ReadFromCKWriteCS \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 2g --executor-cores 2 --num-executors ${cores} \ ../${JAR} \ -date ${date} -hour ${hour} -region ${region} -host ${host} -cluster ${cluster} -database ${database} -table ${table} --output ${S3_OUTPUT} -part ${partition} diff --git a/azkaban/retarget_dealerid/dsp_dealer_daily.sh b/azkaban/retarget_dealerid/dsp_dealer_daily.sh index a66da1c..5bb9e47 100644 --- a/azkaban/retarget_dealerid/dsp_dealer_daily.sh +++ b/azkaban/retarget_dealerid/dsp_dealer_daily.sh @@ -13,7 +13,6 @@ hadoop fs -rm -r $ETL_DSP_DEALERID_RETARGET_DAILY_PATH spark-submit --class mobvista.dmp.datasource.dsp.DspDealeridRetarget \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.sql.shuffle.partitions=2000 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \ ../${JAR} -yyyymmdd $dt -output $ETL_DSP_DEALERID_RETARGET_DAILY_PATH -appFile $ETL_DSP_DEALERID_PKG_MAPPING -coalesce 2000 || exit 1 diff --git a/azkaban/retarget_dealerid/dsp_dealer_device_tag.sh b/azkaban/retarget_dealerid/dsp_dealer_device_tag.sh index 791dfe9..afedf63 100644 --- a/azkaban/retarget_dealerid/dsp_dealer_device_tag.sh +++ b/azkaban/retarget_dealerid/dsp_dealer_device_tag.sh @@ -28,8 +28,6 @@ hadoop fs -rmr ${store_output_path} spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.sql.shuffle.partitions=1000 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 5 --num-executors 40 \ ../${JAR} \ -date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1000 diff --git a/azkaban/rtdmp/device_info_calc.sh b/azkaban/rtdmp/device_info_calc.sh index c25f7d9..e11e889 100644 --- a/azkaban/rtdmp/device_info_calc.sh +++ b/azkaban/rtdmp/device_info_calc.sh @@ -20,7 +20,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.DeviceInfoCalc \ --conf spark.default.parallelism=4000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 4 --num-executors 100 \ ../${JAR} -date ${date} -output ${OUTPUT} diff --git a/azkaban/rtdmp/device_region_calc.sh b/azkaban/rtdmp/device_region_calc.sh index bee79c4..3f8c627 100644 --- a/azkaban/rtdmp/device_region_calc.sh +++ b/azkaban/rtdmp/device_region_calc.sh @@ -24,8 +24,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.DeviceRegionCalc \ --conf spark.default.parallelism=200 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} -date ${date} -output ${OUTPUT} diff --git a/azkaban/rtdmp/device_region_merge.sh b/azkaban/rtdmp/device_region_merge.sh index 4aafe27..a6c4993 100644 --- a/azkaban/rtdmp/device_region_merge.sh +++ b/azkaban/rtdmp/device_region_merge.sh @@ -18,8 +18,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.DeviceRegionMerge \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} -date ${date} -output ${OUTPUT} -input ${INPUT} -coalesce 200 diff --git a/azkaban/rtdmp/device_region_result.sh b/azkaban/rtdmp/device_region_result.sh index f0ee97a..6703d67 100644 --- a/azkaban/rtdmp/device_region_result.sh +++ b/azkaban/rtdmp/device_region_result.sh @@ -15,8 +15,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.DeviceRegionResult \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \ ../${JAR} -date ${date} -output ${OUTPUT} diff --git a/azkaban/rtdmp/device_region_write.sh b/azkaban/rtdmp/device_region_write.sh index 97e4787..1feaeaa 100644 --- a/azkaban/rtdmp/device_region_write.sh +++ b/azkaban/rtdmp/device_region_write.sh @@ -16,7 +16,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.DeviceRegionWrite \ --conf spark.default.parallelism=1000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 4 \ ../${JAR} -input ${INPUT} diff --git a/azkaban/rtdmp/lazada/etl_job.sh b/azkaban/rtdmp/lazada/etl_job.sh index 254fb31..7f93f87 100644 --- a/azkaban/rtdmp/lazada/etl_job.sh +++ b/azkaban/rtdmp/lazada/etl_job.sh @@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.lazada.ETLJob \ --conf spark.default.parallelism=100 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 4 --num-executors 5 \ ../../${JAR} -dt ${date} -output ${OUTPUT} -tb_type ${tb_type} diff --git a/azkaban/rtdmp/lazada/lazada_rtdmp.sh b/azkaban/rtdmp/lazada/lazada_rtdmp.sh index ba79877..cb3e7c2 100644 --- a/azkaban/rtdmp/lazada/lazada_rtdmp.sh +++ b/azkaban/rtdmp/lazada/lazada_rtdmp.sh @@ -16,7 +16,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.lazada.ProcessRTJob \ --conf spark.default.parallelism=4000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 4 --num-executors 5 \ ../../${JAR} -dt ${date} -output ${OUTPUT} diff --git a/azkaban/rtdmp/lazada/merge_install.sh b/azkaban/rtdmp/lazada/merge_install.sh index cf40fda..fe51a2c 100644 --- a/azkaban/rtdmp/lazada/merge_install.sh +++ b/azkaban/rtdmp/lazada/merge_install.sh @@ -22,7 +22,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.lazada.MergeInstallJob \ --conf spark.default.parallelism=4000 \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 4 --num-executors 5 \ ../../${JAR} -dt ${date} -output ${OUTPUT} diff --git a/azkaban/rtdmp/rtdmp_as.sh b/azkaban/rtdmp/rtdmp_as.sh index 891127c..4e2a922 100644 --- a/azkaban/rtdmp/rtdmp_as.sh +++ b/azkaban/rtdmp/rtdmp_as.sh @@ -24,7 +24,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpAS \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.3 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 20 \ ../${JAR} -input_audience ${INPUT_AUDIENCE} -input_data ${INPUT_DATA} -output ${OUTPUT} -coalesce 100 -time "${date_time}" diff --git a/azkaban/rtdmp/rtdmp_merge.sh b/azkaban/rtdmp/rtdmp_merge.sh index b3fb8d7..492015d 100644 --- a/azkaban/rtdmp/rtdmp_merge.sh +++ b/azkaban/rtdmp/rtdmp_merge.sh @@ -49,8 +49,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMerge \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.3 \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 18g --driver-memory 6g --executor-cores 5 --num-executors 20 \ ../${JAR} -date_time "${curr_time}" -old_time "${old_time}" -input ${INPUT} -output ${OUTPUT} -partition 100 diff --git a/azkaban/rtdmp/rtdmp_repair.sh b/azkaban/rtdmp/rtdmp_repair.sh index c300ffd..cad5cd0 100644 --- a/azkaban/rtdmp/rtdmp_repair.sh +++ b/azkaban/rtdmp/rtdmp_repair.sh @@ -21,7 +21,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMain \ --conf spark.speculation=true \ --conf spark.speculation.quantile=0.9 \ --conf spark.speculation.multiplier=1.3 \ - --jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 3 --num-executors 20 \ ../${JAR} -time "${date_time}" -data_utime "${date_time}" -output ${OUTPUT} -coalesce 100 -partition 2000 diff --git a/azkaban/rtdmp/rtdmp_request.sh b/azkaban/rtdmp/rtdmp_request.sh index cebfa88..66f7f5f 100644 --- a/azkaban/rtdmp/rtdmp_request.sh +++ b/azkaban/rtdmp/rtdmp_request.sh @@ -96,8 +96,6 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpRequest \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors ${executors} \ ../${JAR} -date "${date}" -hh "0${hh}" -output ${OUTPUT} -coalesce ${coalesce} -business ${business} -table ${table} diff --git a/azkaban/rtdmp/rtdmp_request_iqiyi_adx.sh b/azkaban/rtdmp/rtdmp_request_iqiyi_adx.sh index ea465ae..dbfc03a 100644 --- a/azkaban/rtdmp/rtdmp_request_iqiyi_adx.sh +++ b/azkaban/rtdmp/rtdmp_request_iqiyi_adx.sh @@ -25,8 +25,6 @@ spark-submit --class mobvista.dmp.datasource.iqiyi.RTDmpIQiYiRequest \ --conf spark.kryoserializer.buffer.max=256m \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ - --jars ${JARS} \ --master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 100 \ ../${JAR} -date ${date} -output ${OUTPUT} diff --git a/azkaban/setting/appid_package.sh b/azkaban/setting/appid_package.sh index d11f8f0..374624b 100644 --- a/azkaban/setting/appid_package.sh +++ b/azkaban/setting/appid_package.sh @@ -37,8 +37,6 @@ spark-submit --class mobvista.dmp.datasource.setting.SettingTotal \ --conf spark.network.timeout=720s \ --conf spark.default.parallelism=30 \ --conf spark.sql.autoBroadcastJoinThreshold=31457280 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --name apps_flyer_total --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 5 \ ../${JAR} -outputtotal ${APP_ID_MAPPING_TMP} \ -coalesce 30 \ diff --git a/azkaban/statistics/dm_device_tag_statistics.sh b/azkaban/statistics/dm_device_tag_statistics.sh index 1fdbb8a..5160c30 100644 --- a/azkaban/statistics/dm_device_tag_statistics.sh +++ b/azkaban/statistics/dm_device_tag_statistics.sh @@ -35,7 +35,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmDeviceTagStatistics \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 18g --driver-memory 4g --executor-cores 5 --num-executors 60 \ ../${JAR} \ -output ${mount_path} -date ${date} -coalesce 500 diff --git a/azkaban/toutiao/dm_toutiao_launch_total.sh b/azkaban/toutiao/dm_toutiao_launch_total.sh index cba16a9..c4ab9b8 100644 --- a/azkaban/toutiao/dm_toutiao_launch_total.sh +++ b/azkaban/toutiao/dm_toutiao_launch_total.sh @@ -25,7 +25,6 @@ hadoop fs -rm -r ${OUTPUT_PATH} spark-submit --class mobvista.dmp.datasource.toutiao.DmToutiaoTotal \ --conf spark.sql.shuffle.partitions=${reduce_num} \ --conf spark.default.parallelism=200 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 50 \ ../${JAR} -output $OUTPUT_PATH -date ${dt} -yestoday ${old_date} -updateDate ${update_date} if [ $? -ne 0 ];then diff --git a/azkaban/toutiao/toutiao_dmp_device_tag.sh b/azkaban/toutiao/toutiao_dmp_device_tag.sh index 9714515..c6b8590 100644 --- a/azkaban/toutiao/toutiao_dmp_device_tag.sh +++ b/azkaban/toutiao/toutiao_dmp_device_tag.sh @@ -21,8 +21,6 @@ spark-submit --class mobvista.dmp.datasource.toutiao.GameDeviceToutiao \ --conf spark.yarn.executor.memoryOverhead=2048 \ --conf spark.sql.shuffle.partitions=4000 \ --conf spark.default.parallelism=300 \ - --files ${HIVE_SITE_PATH} \ - --jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \ --master yarn --deploy-mode cluster --executor-memory 12g --driver-memory 6g --executor-cores 4 --num-executors 120 \ ${JAR} \ -date ${date} -output ${output_path} -startDate ${startDate} -endDate ${endDate} -file ${file} diff --git a/azkaban/toutiao/toutiao_launch_device_tag.sh b/azkaban/toutiao/toutiao_launch_device_tag.sh index ba9562b..c4824ff 100644 --- a/azkaban/toutiao/toutiao_launch_device_tag.sh +++ b/azkaban/toutiao/toutiao_launch_device_tag.sh @@ -31,7 +31,6 @@ hadoop fs -rm -r ${output_path} spark-submit --class mobvista.dmp.datasource.toutiao.DmToutiaoDeviceTag \ --conf spark.sql.shuffle.partitions=${reduce_num} \ --conf spark.default.parallelism=200 \ - --files ${HIVE_SITE_PATH} \ --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 50 \ ../${JAR} -date "$dt" -output ${output_path} if [ $? -ne 0 ];then diff --git a/azkaban/userinfo/ods_dmp_user_info_all.sh b/azkaban/userinfo/ods_dmp_user_info_all.sh index 25cde2c..97710ae 100644 --- a/azkaban/userinfo/ods_dmp_user_info_all.sh +++ b/azkaban/userinfo/ods_dmp_user_info_all.sh @@ -98,7 +98,6 @@ spark-submit --class mobvista.dmp.datasource.device.OdsDmpUserInfoAll \ --conf spark.sql.files.maxPartitionBytes=268435456 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 10G \ diff --git a/azkaban/userinfo/ods_dmp_user_info_all_v2.sh b/azkaban/userinfo/ods_dmp_user_info_all_v2.sh index f742778..b838073 100644 --- a/azkaban/userinfo/ods_dmp_user_info_all_v2.sh +++ b/azkaban/userinfo/ods_dmp_user_info_all_v2.sh @@ -34,7 +34,6 @@ spark-submit --class mobvista.dmp.datasource.device.OdsDmpUserInfoAllV2 \ --conf spark.shuffle.memoryFraction=0.4 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=268435456 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 12G \ diff --git a/azkaban/userinfo/ods_dmp_user_info_daily.sh b/azkaban/userinfo/ods_dmp_user_info_daily.sh index a1783bc..33071fb 100644 --- a/azkaban/userinfo/ods_dmp_user_info_daily.sh +++ b/azkaban/userinfo/ods_dmp_user_info_daily.sh @@ -121,7 +121,6 @@ spark-submit --class mobvista.dmp.datasource.device.OdsDmpUserInfoDailyV2 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \ --conf spark.sql.autoBroadcastJoinThreshold=-1 \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 12G \ diff --git a/azkaban/userinfo/ods_dmp_user_info_daily_v2.sh b/azkaban/userinfo/ods_dmp_user_info_daily_v2.sh index 5d1e42e..7899e3d 100644 --- a/azkaban/userinfo/ods_dmp_user_info_daily_v2.sh +++ b/azkaban/userinfo/ods_dmp_user_info_daily_v2.sh @@ -116,14 +116,12 @@ spark-submit --class mobvista.dmp.datasource.device.OdsDmpUserInfoDailyV3 \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \ --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \ - --files ${HIVE_SITE_PATH} \ --master yarn \ --deploy-mode cluster \ --executor-memory 10G \ --driver-memory 6G \ --executor-cores 4 \ --num-executors 180 \ - --jars ${JARS} \ ../${JAR} \ -cur_day ${date} -gender_date ${GET_GENDER_DATE} -output ${OUTPUT_PATH} -coalesce 2000 diff --git a/src/main/scala/mobvista/dmp/output/reyun/Constant.scala b/src/main/scala/mobvista/dmp/output/reyun/Constant.scala index 6f25dfb..3c40ddd 100644 --- a/src/main/scala/mobvista/dmp/output/reyun/Constant.scala +++ b/src/main/scala/mobvista/dmp/output/reyun/Constant.scala @@ -37,7 +37,7 @@ object Constant { val user_info = """ |SELECT dev_id, dev_id_md5, LOWER(dev_type) dev_type, LOWER(platform) platform, UPPER(country) country, install, interest, update_date - | FROM dwh.ods_dmp_user_info_all + | FROM dwh.ods_dmp_user_info_all_v2 | WHERE dt = '@date' AND UPPER(country) = 'CN' |""".stripMargin } -- libgit2 0.27.1