Commit 911afe9a by WangJinfeng

remove --files、--jars, upgrade to spark 3.1.1

parent 3c38857b
......@@ -32,8 +32,6 @@ hadoop fs -rmr $store_output_path
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.sql.shuffle.partitions=20 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 4 \
../${JAR} \
-date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 20
......
......@@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.tracking_3s.TrackingTagDaily \
--conf spark.sql.shuffle.partitions=5 \
--conf spark.default.parallelism=5 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 3 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 2
......
......@@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--conf spark.default.parallelism=8 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4
......
......@@ -38,8 +38,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \
--conf spark.default.parallelism=20 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \
../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 10
......
......@@ -40,7 +40,6 @@ spark-submit --class mobvista.dmp.datasource.postback_3s.PostBackDaily \
--conf spark.sql.shuffle.partitions=1000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 3 --num-executors 100 \
../${JAR} -output ${OUTPUT_PATH} -coalesce 100 \
......
......@@ -28,8 +28,6 @@ hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.sql.shuffle.partitions=1000 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 3 --num-executors 40 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1000
......
......@@ -28,8 +28,6 @@ spark-submit --class mobvista.dmp.datasource.postback_3s.PostBackTagDaily \
--conf spark.sql.shuffle.partitions=10 \
--conf spark.default.parallelism=10 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 5
......
......@@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--conf spark.default.parallelism=100 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 5 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 40
......
......@@ -38,7 +38,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \
--conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 25 \
../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 200
......
......@@ -48,7 +48,6 @@ spark-submit --class mobvista.dmp.datasource.packagelist.MergeCampaignList \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 4 \
../${JAR} -input_dmp_data_adn ${INPUT_DMP_DATA_ADN} -input_campaign_adn ${INPUT_CAMPAIGN_ADN} -coalesce 20 \
-output ${CAMPAIGN_TAG_OUTPUT} -today ${yester_year}-${yester_month}-${yester_day} || exit 1
......
......@@ -24,7 +24,6 @@ hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=2000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region frankfurt || exit 1
......
......@@ -24,7 +24,6 @@ hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=2000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region seoul || exit 1
......
......@@ -23,7 +23,6 @@ hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=2000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 50 \
../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 400 -region singapore || exit 1
......
......@@ -24,7 +24,6 @@ hadoop fs -rm -r $ETL_ADN_REQ_ORG_HOURS_PATH
spark-submit --class mobvista.dmp.datasource.adn.AdnOrgLogEtlHours \
--conf spark.yarn.executor.memoryOverhead=3072 \
--conf spark.sql.shuffle.partitions=2000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 20 \
../../${JAR} -datetime "$yt$mt$dt$hhpath" -output $ETL_ADN_REQ_ORG_HOURS_PATH -coalesce 200 -region virginia || exit 1
......
......@@ -32,8 +32,6 @@ hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.sql.shuffle.partitions=20 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 5 \
../../${JAR} \
-date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 20
......
......@@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn.AdnInstallTagDaily \
--conf spark.sql.shuffle.partitions=50 \
--conf spark.default.parallelism=10 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 4 --num-executors 5 \
../../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 1
......
......@@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--conf spark.default.parallelism=8 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \
../../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 4
......
......@@ -32,8 +32,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \
--conf spark.sql.shuffle.partitions=400 \
--conf spark.default.parallelism=400 \
--conf spark.kryoserializer.buffer.max=256m \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 5 \
../../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 100
......
......@@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \
--conf spark.kryoserializer.buffer=64m \
--conf spark.sql.adaptive.enabled=true \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 40 \
../../${JAR} \
-date ${date} -coalesce 200 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING}
......
......@@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \
--conf spark.speculation=true \
--conf spark.speculation.quantile=0.8 \
--conf spark.speculation.multiplier=1.2 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 10 \
../../${JAR} \
-output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business}
......
......@@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.sql.shuffle.partitions=4000 \
--conf spark.default.parallelism=4000 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 100 \
../../${JAR} \
-date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 400
......
......@@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn.AdnRequestSdkTagDaily \
--conf spark.sql.shuffle.partitions=20 \
--conf spark.default.parallelism=5 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 2 \
../../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 20
......
......@@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--conf spark.default.parallelism=200 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 10 \
../../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100
......
......@@ -32,7 +32,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.default.parallelism=2000 \
--conf spark.kryoserializer.buffer.max=256m \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 18g --driver-memory 4g --executor-cores 5 --num-executors 60 \
../../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 1000
......
......@@ -32,7 +32,6 @@ spark-submit --class mobvista.dmp.datasource.adn_request_other.EtlAdnRequestOthe
--conf spark.storage.memoryFraction=0.4 \
--conf spark.driver.maxResultSize=5g \
--conf spark.executor.extraJavaOptions=-XX:+UseG1GC \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 5 --num-executors 40 \
../../${JAR} -input "${INPUT_PATH}/*/*" -output $OUTPUT_PATH
if [ $? -ne 0 ]; then
......
......@@ -33,8 +33,6 @@ hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.yarn.executor.memoryOverhead=3072 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 80 \
../../${JAR} \
-date $date -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 2000
......
......@@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn_request_other.AdnRequestOtherTa
--conf spark.sql.shuffle.partitions=20 \
--conf spark.default.parallelism=10 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \
../../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 10
......
......@@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--conf spark.default.parallelism=200 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 20 \
../../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100
......
......@@ -27,7 +27,6 @@ hadoop fs -rm -r "$OUTPUT_PATH"
spark-submit --class mobvista.dmp.datasource.adn_request_other.AdnRequestOtherInstall \
--conf spark.yarn.executor.memoryOverhead=2048 \
--conf spark.sql.shuffle.partitions=2000 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 200 \
../../${JAR} \
-input "${INPUT_PATH}" -output $OUTPUT_PATH -date $date -oldInput $OLD_INPUT_PATH -parallelism 2000 -coalesce 2000
......
......@@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \
--conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 50 \
../../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 400
......
......@@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \
--conf spark.kryoserializer.buffer=64m \
--conf spark.sql.adaptive.enabled=true \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \
../../${JAR} \
-date ${date} -coalesce 1000 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING}
......
......@@ -38,8 +38,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \
--conf spark.speculation=true \
--conf spark.speculation.quantile=0.8 \
--conf spark.speculation.multiplier=1.2 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 30 \
../../${JAR} \
-output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business}
......
......@@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \
--conf spark.kryoserializer.buffer=64m \
--conf spark.sql.adaptive.enabled=true \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \
../../${JAR} \
-date ${date} -coalesce 1000 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING}
......
......@@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \
--conf spark.speculation=true \
--conf spark.speculation.quantile=0.8 \
--conf spark.speculation.multiplier=1.2 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 2 --num-executors 40 \
../../${JAR} \
-output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business}
......
......@@ -30,8 +30,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \
--conf spark.default.parallelism=1000 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC " \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 4 --num-executors 50 \
../../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 200
......
......@@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.datasource.adn_adx.AdnAdxDeviceTag \
--conf spark.network.timeout=720s \
--conf spark.default.parallelism=10 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar \
--master yarn --deploy-mode cluster --name AdnAdxDeviceTag --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 32 \
../${JAR} -outputadxdevtag ${OUTPUT_ADN_ADX_DEVICE_TAG_PATH} \
......
......@@ -34,7 +34,6 @@ spark-submit --class mobvista.dmp.datasource.adn_adx.AdnTecentAdxDataMidWay \
--conf spark.default.parallelism=1000 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/hive-hcatalog-core-2.3.3.jar,s3://mob-emr-test/dataplatform/DataWareHouse/offline/myjar/json-serde-1.3.7-jar-with-dependencies.jar \
--master yarn --deploy-mode cluster --name AdnTecentAdxDataMidWay --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 100 \
../${JAR} -outputadxtmp ${OUTPUT_ODS_ADX_TMP_PATH} -dimadxpkg ${OUTPUT_DIM_ADN_ADX_PKG_PATH} \
......
......@@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn_sdk.AdnSdkDaily \
--conf spark.driver.maxResultSize=8g \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--conf spark.app.coalesce=60000 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --name adn_sdk_daily --executor-memory 8g --driver-memory 6g --executor-cores 4 --num-executors 200 \
../${JAR}
......
......@@ -34,8 +34,6 @@ hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 3 --num-executors 200 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 2000
......
......@@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.datasource.adn_sdk.AdnSdkTagDaily \
--conf spark.sql.shuffle.partitions=10 \
--conf spark.default.parallelism=10 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 10
......
......@@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--conf spark.default.parallelism=1000 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100
......
......@@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \
--conf spark.default.parallelism=4000 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 100 \
../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 2000
......
......@@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \
--conf spark.kryoserializer.buffer=64m \
--conf spark.sql.adaptive.enabled=true \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 80 \
../${JAR} \
-date ${date} -coalesce 1000 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING}
......
......@@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \
--conf spark.speculation=true \
--conf spark.speculation.quantile=0.8 \
--conf spark.speculation.multiplier=1.2 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 80 \
../${JAR} \
-output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business}
......
......@@ -34,8 +34,6 @@ hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 6g --executor-cores 4 --num-executors 150 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 2000
......
......@@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--conf spark.default.parallelism=100 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 20 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 100
......
......@@ -33,8 +33,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \
--conf spark.default.parallelism=500 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 50 \
../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 200
......
......@@ -31,7 +31,6 @@ spark-submit --class mobvista.dmp.datasource.dm.FixInstallListRuid \
--conf spark.kryoserializer.buffer=64m \
--conf spark.sql.adaptive.enabled=true \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 40 \
../${JAR} \
-date ${date} -coalesce 200 -output $OUTPUT_PATH -business ${BUSINESS} -input ${INPUT_MAPPING}
......
......@@ -43,8 +43,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \
--conf spark.speculation=true \
--conf spark.speculation.quantile=0.8 \
--conf spark.speculation.multiplier=1.2 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 80 \
../${JAR} \
-output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business}
......
......@@ -36,8 +36,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.GetAgeGender \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.default.parallelism=2000 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 5 --num-executors 60 \
../${JAR} -ageOutput ${AGE_OUTPUT_PATH} -genderOutput ${GENDER_OUTPUT_PATH} -date ${LOG_TIME} -business ${business}
......
......@@ -58,8 +58,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.GetAgeGender \
--conf spark.speculation.quantile=0.9 \
--conf spark.speculation.multiplier=1.5 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 50 \
../${JAR} -ageOutput ${AGE_OUTPUT_PATH} -genderOutput ${GENDER_OUTPUT_PATH} -date ${GA_TOTAL_DATE} -business ${business}
......
......@@ -52,7 +52,6 @@ spark-submit --class mobvista.dmp.datasource.age_gender.MergeInstallAge \
--conf spark.sql.files.maxPartitionBytes=536870912 \
--conf spark.sql.adaptive.enabled=true \
--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=536870912 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 5 --num-executors 60 \
../${JAR} -date ${LOG_TIME} \
-ga_age_path ${GA_AGE_PATH} -dsp_age_path ${DSP_AGE_PATH} -age_output ${OUTPUT_PATH} -parallelism 2000
......
......@@ -27,7 +27,6 @@ spark-submit --class mobvista.dmp.datasource.TO.TODaily \
--conf spark.sql.shuffle.partitions=3000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 20 \
../../${JAR} \
-output ${OUTPUT_PATH} -coalesce 200 -dt_dash_today ${dt_dash_today}
......
......@@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.baichuan.BaiChuanJob \
--conf spark.default.parallelism=100 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 6g --executor-cores 2 --num-executors 20 \
../${JAR} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table}
......
......@@ -24,7 +24,6 @@ spark-submit --class mobvista.dmp.datasource.baichuan.AliDaily \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 3 --num-executors 5 \
../${JAR} -date ${LOG_TIME} -partNum 10 -output ${OUTPUT_PATH} -host ${host} -cluster ${cluster} -database ${database} -table ${table}
......
......@@ -36,8 +36,6 @@ hadoop fs -rmr ${store_output_path}
spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTag \
--conf spark.sql.shuffle.partitions=100 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 6g --executor-cores 2 --num-executors 5 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 40
......
......@@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.baichuan.AliTagDaily \
--conf spark.sql.shuffle.partitions=10 \
--conf spark.default.parallelism=10 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 5 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 5
......
......@@ -31,8 +31,6 @@ spark-submit --class mobvista.dmp.datasource.newtag.MatchInterestTagDailyV2 \
--conf spark.default.parallelism=8 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 2 \
../${JAR} \
-date ${date} -manualOutput ${output_path} -business ${business} -storeOutput ${store_output_path} -coalesce 8
......
......@@ -39,8 +39,6 @@ spark-submit --class mobvista.dmp.datasource.dm.DmInstallListOrc \
--conf spark.speculation=true \
--conf spark.speculation.quantile=0.8 \
--conf spark.speculation.multiplier=1.2 \
--files ${HIVE_SITE_PATH} \
--jars /data/hadoop-alternative/hive/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 3 --num-executors 80 \
../${JAR} \
-output ${OUTPUT_PATH} -input ${INPUT_PATH} -business ${business}
......
......@@ -34,8 +34,6 @@ spark-submit --class mobvista.dmp.common.InstallListLogic \
--conf spark.default.parallelism=400 \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars ${SPARK_HOME}/auxlib/Common-SerDe-1.0-SNAPSHOT.jar \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 50 \
../${JAR} -date ${LOG_TIME} -business ${business} -output ${OUTPUT} -coalesce 100
......
......@@ -34,7 +34,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliIosActivitionDaily \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 60 \
../../${JAR} -output ${OUTPUT_PATH} -outputdaily ${ALI_OUTPUT_DAILY_PATH} -coalesce 500 \
-yesterday ${yesterday} -today ${dt_today} -dt_dash_today ${dt_dash_today} -dt_dash_rec14day ${dt_dash_rec14day} \
......
......@@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliOaidActivitionDaily \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 60 \
../../${JAR} -output ${OUTPUT_PATH} -outputdaily ${ALI_OAID_OUTPUT_DAILY_PATH} -coalesce 500 \
-yesterday ${yesterday} -today ${dt_today} -dt_dash_today ${dt_dash_today} -dt_dash_rec14day ${dt_dash_rec14day} \
......
......@@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliActivitionDaily \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 4 --num-executors 60 \
../../${JAR} -output ${OUTPUT_PATH} -outputdaily ${ALI_OUTPUT_DAILY_PATH} -coalesce 500 \
-yesterday ${yesterday} -today ${dt_today} -dt_dash_today ${dt_dash_today} -dt_dash_rec14day ${dt_dash_rec14day} \
......
......@@ -84,7 +84,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliActivitionPostBackDail
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 3 --num-executors 60 \
../${JAR} -output ${OUTPUT_PATH} -iosoutput ${ALI_IOS_OUTPUT} -oaidoutput ${ALI_OAID_OUTPUT} -coalesce 50 \
-today ${dt_today} -update_date ${dt_dash_today} \
......
......@@ -66,7 +66,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlAliActivitionPostBackDail
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--conf spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive=true \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 70 \
../../${JAR} -output ${OUTPUT_PATH} -iosoutput ${ALI_IOS_OUTPUT} -oaidoutput ${ALI_OAID_OUTPUT} -coalesce 300 \
-today ${dt_today} -update_date ${dt_dash_today} -dt_taobao_postback_day ${dt_taobao_postback_day}\
......
......@@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlH18FromDmInstallListV2 \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 60 \
../../${JAR} \
-h18_imei ${IMEI_H_18_GUANGDIANTONG_RES_PATH} -h18_imeimd5 ${IMEIMD5_H_18_GUANGDIANTONG_RES_PATH} \
......
......@@ -43,7 +43,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlOtherDataFromPostBackDail
--conf spark.sql.shuffle.partitions=3000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 150 \
../../${JAR} \
-output ${OUTPUT_PATH} \
......
......@@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayImeiLaHuoCK \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--conf hive.exec.orc.default.stripe.size=268435456 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \
../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} -hour ${hour}
......
......@@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayImeiLaHuoCK \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--conf hive.exec.orc.default.stripe.size=268435456 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \
../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} -hour ${hour}
......
......@@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayImeiLaHuoCK \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--conf hive.exec.orc.default.stripe.size=268435456 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \
../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} -hour ${hour}
......
......@@ -25,7 +25,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayImeiLaHuoCK \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--conf hive.exec.orc.default.stripe.size=268435456 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \
../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table} -hour ${hour}
......
......@@ -37,7 +37,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDaily \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 120 ../../${JAR} \
-imeioutput "${ALIPAY_IMEIMD5_OUTPUT_PATH}" \
-today ${dt_today} -last_req_day ${last_req_day} -dt_after_one_day ${dt_after_one_day}\
......
......@@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayTmpDataToDmp \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \
../../${JAR} -imeiRequestInput ${IMEIMD5_REQUEST_INPUT_PATH} -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} \
-output01 ${OUTPUT01} -output02 ${OUTPUT02}
......@@ -63,7 +62,6 @@ fi
# --conf spark.sql.broadcastTimeout=1200 \
# --conf spark.yarn.executor.memoryOverhead=4096 \
# --conf spark.sql.autoBroadcastJoinThreshold=31457280 \
# --files ${HIVE_SITE_PATH} \
# --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \
# ../../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \
# -ActivationOutput ${ACTIVATIONOUTPUT} -AcquisitionOutput ${ACQUISITIONOUTPUT}
......
......@@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayTmpDataToDmp \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \
../../${JAR} -imeiRequestInput ${IMEIMD5_REQUEST_INPUT_PATH} -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} \
-output01 ${OUTPUT01} -output02 ${OUTPUT02}
......@@ -63,7 +62,6 @@ fi
# --conf spark.sql.broadcastTimeout=1200 \
# --conf spark.yarn.executor.memoryOverhead=4096 \
# --conf spark.sql.autoBroadcastJoinThreshold=31457280 \
# --files ${HIVE_SITE_PATH} \
# --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \
# ../../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \
# -ActivationOutput ${ACTIVATIONOUTPUT} -AcquisitionOutput ${ACQUISITIONOUTPUT}
......
......@@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayTmpDataToDmp \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \
../../${JAR} -imeiRequestInput ${IMEIMD5_REQUEST_INPUT_PATH} -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} \
-output01 ${OUTPUT01} -output02 ${OUTPUT02}
......@@ -63,7 +62,6 @@ fi
# --conf spark.sql.broadcastTimeout=1200 \
# --conf spark.yarn.executor.memoryOverhead=4096 \
# --conf spark.sql.autoBroadcastJoinThreshold=31457280 \
# --files ${HIVE_SITE_PATH} \
# --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \
# ../../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \
# -ActivationOutput ${ACTIVATIONOUTPUT} -AcquisitionOutput ${ACQUISITIONOUTPUT}
......
......@@ -33,7 +33,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayTmpDataToDmp \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \
../../${JAR} -imeiRequestInput ${IMEIMD5_REQUEST_INPUT_PATH} -imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} \
-output01 ${OUTPUT01} -output02 ${OUTPUT02}
......@@ -63,7 +62,6 @@ fi
# --conf spark.sql.broadcastTimeout=1200 \
# --conf spark.yarn.executor.memoryOverhead=4096 \
# --conf spark.sql.autoBroadcastJoinThreshold=31457280 \
# --files ${HIVE_SITE_PATH} \
# --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \
# ../../${JAR} -dt_today ${dt_today} -dt_three_days_ago ${dt_three_days_ago} \
# -ActivationOutput ${ACTIVATIONOUTPUT} -AcquisitionOutput ${ACQUISITIONOUTPUT}
......
......@@ -20,8 +20,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDF \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 10 \
../../${JAR} -date ${LOG_TIME} -dt_end_days ${dt_end_days} -partNum 60 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -cluster 'cluster_1st' -hour ${hour}
......
......@@ -19,8 +19,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDF \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 10 \
../../${JAR} -date ${LOG_TIME} -dt_end_days ${dt_end_days} -partNum 60 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -cluster 'cluster_1st' -hour ${hour}
......
......@@ -19,8 +19,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDF \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 10 \
../../${JAR} -date ${LOG_TIME} -dt_end_days ${dt_end_days} -partNum 60 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -cluster 'cluster_1st' -hour ${hour}
......
......@@ -19,8 +19,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayLaHuoDF \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.driver.extraJavaOptions="-XX:+UseG1GC" \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--files ${HIVE_SITE_PATH} \
--jars s3://mob-emr-test/wangjf/jar/spark-clickhouse-connector_2.11-2.4.0_0.22.jar \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 10 \
../../${JAR} -date ${LOG_TIME} -dt_end_days ${dt_end_days} -partNum 60 -imeiOutput ${IMEIMD5_OUTPUT_PATH} -cluster 'cluster_1st' -hour ${hour}
......
......@@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayOtherDataToDmp \
--conf spark.sql.shuffle.partitions=3000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 140 \
../../${JAR} \
-output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} \
......
......@@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayOtherDataToDmp \
--conf spark.sql.shuffle.partitions=3000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 140 \
../../${JAR} \
-output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} \
......
......@@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayOtherDataToDmp \
--conf spark.sql.shuffle.partitions=3000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 140 \
../../${JAR} \
-output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} \
......
......@@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.AlipayOtherDataToDmp \
--conf spark.sql.shuffle.partitions=3000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 6 --num-executors 140 \
../../${JAR} \
-output01 ${OUTPUT_PATH01} -output02 ${OUTPUT_PATH02} \
......
......@@ -45,7 +45,6 @@ spark-submit --class mobvista.dmp.datasource.btop.BtopDaily \
--conf spark.sql.shuffle.partitions=3000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 2 --num-executors 60 \
../../${JAR} \
-output ${OUTPUT_PATH} -coalesce 200 -dt_today ${dt_today} -dt_dash_tow_days ${dt_dash_tow_days} -seven_days_ago ${seven_days_ago} -fifteen_days_ago ${fifteen_days_ago} \
......
......@@ -22,7 +22,6 @@ spark-submit \
--class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \
--conf spark.network.timeout=720s \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn \
--deploy-mode cluster \
--name cainixihuan_AppInfoCrawlerSpark \
......
......@@ -25,7 +25,6 @@ spark-submit \
--class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \
--conf spark.network.timeout=720s \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn \
--deploy-mode cluster \
--name cainixihuan_AppInfoCrawlerSpark \
......
......@@ -24,7 +24,6 @@ spark-submit \
--class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \
--conf spark.network.timeout=720s \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn \
--deploy-mode cluster \
--name cainixihuan_AppInfoCrawlerSpark \
......
......@@ -24,7 +24,6 @@ spark-submit \
--class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \
--conf spark.network.timeout=720s \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn \
--deploy-mode cluster \
--name cainixihuan_AppInfoCrawlerSpark \
......
......@@ -24,7 +24,6 @@ spark-submit \
--class mobvista.dmp.datasource.apptag.CaiNiXiHuanCrawlerSpark \
--conf spark.network.timeout=720s \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn \
--deploy-mode cluster \
--name cainixihuan_AppInfoCrawlerSpark \
......
......@@ -36,7 +36,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlDealidDaily \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 10g --driver-memory 4g --executor-cores 6 --num-executors 30 \
../${JAR} -dt_dash_today ${dt_dash_today} \
-oppooutput ${OPPO_OUTPUT} \
......
......@@ -62,7 +62,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlLazadaActivitionDaily \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 90 ../${JAR} \
-gaidoutput "${GAID_OUTPUT_PATH}" \
-today ${dt_today} -last_req_day ${last_req_day} \
......@@ -107,7 +106,6 @@ fi
# --conf spark.sql.broadcastTimeout=1200 \
# --conf spark.yarn.executor.memoryOverhead=4096 \
# --conf spark.sql.autoBroadcastJoinThreshold=31457280 \
# --files ${HIVE_SITE_PATH} \
# --master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 70 ../${JAR} \
# -gaidoutput "${GAID_OUTPUT_PATH}" -gaidinput "${GAID_INPUT_PATH}" -newoutput "${NEW_OUTPUT_PATH}" \
# -today ${dt_today} -dt_30days_ago ${dt_30days_ago}
......
......@@ -32,7 +32,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlLazadaIosActivitionDaily
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 90 ../${JAR} \
-today ${dt_today} -last_req_day ${last_req_day} \
-vn_idfaoutput "${VN_IDFA_OUTPUT_PATH}" -id_idfaoutput "${ID_IDFA_OUTPUT_PATH}" -th_idfaoutput "${TH_IDFA_OUTPUT_PATH}" \
......
......@@ -17,7 +17,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.EtlComTencentNewsDaily \
--conf spark.sql.shuffle.partitions=3000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 6g --executor-cores 6 --num-executors 120 \
../../${JAR} \
-output ${OUTPUT_PATH} -coalesce 500 \
......
......@@ -29,7 +29,6 @@ spark-submit --class mobvista.dmp.datasource.reyun.ReyunDaily \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 3 --num-executors 60 \
../../${JAR} \
-output ${OUTPUT_PATH} -coalesce 400 -dt_today ${dt_today}
......
......@@ -24,8 +24,6 @@ spark-submit --class mobvista.dmp.datasource.reyun.ReyunLabelBaijiu \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--jars ${JARS} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 170 \
../../${JAR} \
-output ${OUTPUT_PATH} -coalesce 680 -dt_today ${dt_today}
......
......@@ -26,7 +26,6 @@ spark-submit --class mobvista.dmp.datasource.reyun.ReyunLabelTest \
--conf spark.sql.shuffle.partitions=2000 \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 6g --driver-memory 4g --executor-cores 3 --num-executors 40 \
../../${JAR} \
-output ${OUTPUT_PATH} -coalesce 40 -dt_today ${dt_today}
......
......@@ -24,7 +24,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCImeiLaHuoCK \
--conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
--conf hive.exec.orc.default.stripe.size=268435456 \
--conf spark.sql.files.maxPartitionBytes=268435456 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 4g --driver-memory 4g --executor-cores 2 --num-executors 10 \
../../${JAR} -imeiInput ${IMEIMD5_INPUT} -date ${LOG_TIME} -host ${host} -cluster ${cluster} -database ${database} -table ${table}
......
......@@ -24,7 +24,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCLaHuoDaily \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 80 ../../${JAR} \
-imeioutput "${UC_IMEIMD5_OUTPUT_PATH}" -oaidoutput "${UC_OAIDMD5_OUTPUT_PATH}" \
-today ${dt_today} -last_req_day ${last_req_day}
......
......@@ -40,7 +40,6 @@ spark-submit --class mobvista.dmp.datasource.taobao.UCTmpDataToDMP \
--conf spark.sql.broadcastTimeout=1200 \
--conf spark.yarn.executor.memoryOverhead=4096 \
--conf spark.sql.autoBroadcastJoinThreshold=31457280 \
--files ${HIVE_SITE_PATH} \
--master yarn --deploy-mode cluster --executor-memory 8g --driver-memory 4g --executor-cores 4 --num-executors 40 \
../../${JAR} -imeiRequestInput ${UC_IMEIMD5_REQUEST_INPUT_PATH} -oaidRequestInput ${UC_OAIDMD5_REQUEST_INPUT_PATH} \
-imeiResponseInput ${IMEIMD5_RESPONSE_INPUT_PATH} -oaidResponseInput ${OAIDMD5_RESPONSE_INPUT_PATH} \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment