#!/usr/local/bin/python2 # -*- coding: utf-8 -*- import json import os import sys import threading import time reload(sys) sys.setdefaultencoding("utf-8") # get_access_token def get_access_token(appid, secret, auth_code): import requests open_api_url_prefix = "https://ad.toutiao.com/open_api/" uri = "oauth2/access_token/" url = open_api_url_prefix + uri data = { "appid": appid, "secret": secret, "grant_type": "auth_code", "auth_code": auth_code } rsp = requests.post(url, json=data) rsp_data = rsp.json() return rsp_data # refresh_access_token def refresh_access_token(app_id, secret, path): token_file = "%s/refresh_token" % path fr = open(token_file, 'r') refresh_token = fr.readline().strip() import requests open_api_url_prefix = "https://ad.toutiao.com/open_api/" uri = "oauth2/refresh_token/" refresh_token_url = open_api_url_prefix + uri data = { "appid": app_id, "secret": secret, "grant_type": "refresh_token", "refresh_token": refresh_token, } rsp = requests.post(refresh_token_url, json=data) rsp_data = rsp.json() refresh_token = rsp_data['data']['refresh_token'] fw = open(token_file, 'w+') print >> fw, "%s" % refresh_token return rsp_data # create_dmp_data_source def create_dmp_data_source(advertiser_id, data_source_name, description, file_paths, access_token): import requests open_api_url_prefix = "https://ad.toutiao.com/open_api/" uri = "2/dmp/data_source/create/" url = open_api_url_prefix + uri params = { "advertiser_id": advertiser_id, "data_source_name": data_source_name, "description": description, "data_format": 0, "file_storage_type": 0, "file_paths": file_paths } headers = {"Access-Token": access_token} rsp = requests.post(url, json=params, headers=headers) rsp_data = rsp.json() return rsp_data # upload_dmp_data_file def upload_dmp_data_file(advertiser_id, file, access_token): import requests open_api_url_prefix = "https://ad.toutiao.com/open_api/" uri = "2/dmp/data_source/file/upload/" url = open_api_url_prefix + uri files = { "advertiser_id": advertiser_id, "file": open(file, "rb"), } headers = {"Access-Token": access_token} rsp = requests.post(url, files=files, headers=headers) if rsp is not None and hasattr(rsp, 'status_code') and rsp.status_code == 200: rsp_data = rsp.json() else: data = '{"code": 40001}' rsp_data = json.loads(data) return rsp_data # update_dmp_data_source def update_dmp_data_source(advertiser_id, data_source_id, operation_type, file_paths, access_token): import requests open_api_url_prefix = "https://ad.toutiao.com/open_api/" uri = "2/dmp/data_source/update/" url = open_api_url_prefix + uri params = { "advertiser_id": advertiser_id, "data_source_id": data_source_id, "operation_type": operation_type, "data_format": 0, "file_storage_type": 0, "file_paths": file_paths } headers = {"Access-Token": access_token} rsp = requests.post(url, json=params, headers=headers) rsp_data = rsp.json() return rsp_data # delete_ta def delete_ta(advertiser_id, access_token, data_source_id_list): import requests open_api_url_prefix = "https://ad.toutiao.com/open_api/" uri = "2/dmp/data_source/read/" url = open_api_url_prefix + uri params = { "advertiser_id": advertiser_id, "data_source_id_list": data_source_id_list } headers = {"Access-Token": access_token} rsp = requests.get(url, json=params, headers=headers) rsp_data = rsp.json() return rsp_data # select_ta def select_ta(advertiser_id, access_token): import requests open_api_url_prefix = "https://ad.toutiao.com/open_api/" uri = "2/dmp/custom_audience/select/" url = open_api_url_prefix + uri params = { "advertiser_id": advertiser_id, "offset": 0, "limit": 20 } headers = {"Access-Token": access_token} rsp = requests.get(url, json=params, headers=headers) rsp_data = rsp.json() return rsp_data # threadLock = threading.Lock() class customThread(threading.Thread): # 继承父类threading.Thread def __init__(self, threadID, threadName, advertiser_id, data_source_name, access_token, path, files): threading.Thread.__init__(self) self.threadID = threadID self.threadName = threadName self.advertiser_id = advertiser_id self.data_source_name = data_source_name self.description = data_source_name self.path = path self.access_token = access_token self.files = files def run(self): # 把要执行的代码写到run函数里面 线程在创建后会直接运行run函数 print("Starting " + self.name + "\n") # threadLock.acquire() upload_file(self.threadName, 2, self.advertiser_id, self.data_source_name, self.path, self.files, self.access_token) # threadLock.release() print("Exiting " + self.name + "\n") def __del__(self): print(self.name, "线程结束!") total_valid_num = 0 total_invalid_num = 0 all_num = 0 g_data_source_id = '' def upload_file(threadName, delay, advertiser_id, data_source_name, path, strs, access_token): count = strs.__len__() # f_success = open('%s/f_success.txt' % path, 'a') # f_failure = open('%s/f_failure.txt' % path, 'a') # file_path = open('%s/file_path.txt' % path, 'a') valid_nums = 0 invalid_nums = 0 total_nums = 0 file_paths = [] for files in strs: if files != '': import validate valid_num, invalid_num, total_num = validate.validate(files) valid_nums += valid_num invalid_nums += invalid_num total_nums += total_num # 尝试上传重试次数 times = 5 while times: upload_json = upload_dmp_data_file(advertiser_id, files, access_token) if upload_json["code"] == 0: file_paths.append(upload_json["data"]["file_path"]) print("%s 上传文件 %s 成功!" % (threadName, files)) print("threadName: %s,file_paths: %s \n" % (threadName, file_paths)) # print file_paths # f_success.write("%s 上传文件 %s 成功!file_path = %s" % (threadName, files, upload_json["data"]["file_path"])) # f_success.write("\n") time.sleep(delay) break else: times -= 1 print("%s 上传文件 %s 失败!" % (threadName, files)) print("threadName: %s,file_paths: %s \n" % (threadName, file_paths)) i = 3 - times print("%s 尝试第 %d 次上传文件 %s" % (threadName, i, files)) # f_failure.write("%s 上传文件 %s 失败!" % (threadName, files)) # f_failure.write("\n") # file_path.write("%s" % file_paths) # file_path.write("\n") # print "%s 上传文件 %s 失败!" % (threadName, files) continue global total_valid_num global total_invalid_num global all_num total_valid_num += valid_nums total_invalid_num += invalid_nums all_num += total_nums if file_paths.__len__() == count: global g_data_source_id if g_data_source_id == '': description = data_source_name data_source_json = create_dmp_data_source(advertiser_id, data_source_name, description, file_paths, access_token) if data_source_json["code"] == 0: g_data_source_id = data_source_json["data"]["data_source_id"] print("数据源创建成功,threadName: %s,data_source_id: %s,valid_nums: %s,invalid_nums: %s,total_nums: %s" % ( threadName, g_data_source_id, valid_nums, invalid_nums, total_nums) + "\n") # f_success.write( # "数据源创建成功,threadName: %s,data_source_id: %s,valid_nums: %s,invalid_nums: %s,total_nums: %s" % ( # threadName, g_data_source_id, valid_nums, invalid_nums, total_nums)) # f_success.write("\n") else: print("数据源创建失败,threadName: %s,valid_nums: %s,invalid_nums: %s,total_nums: %s" % ( threadName, valid_nums, invalid_nums, total_nums) + "\n") print("file_paths: %s" % file_paths + "\n") # 数据源创建失败 # file_path.write("%s" % file_paths) # file_path.write("\n") alter('数据源创建失败!') else: update_source_json = update_dmp_data_source(advertiser_id, g_data_source_id, 1, file_paths, access_token) if update_source_json["code"] == 0: print("数据源更新成功,threadName: %s,data_source_id: %s,valid_nums: %s,invalid_nums: %s,total_nums: %s" % ( threadName, g_data_source_id, valid_nums, invalid_nums, total_nums)) # f_success.write( # "数据源更新成功,threadName: %s,data_source_id: %s,valid_nums: %s,invalid_nums: %s,total_nums: %s" % ( # threadName, g_data_source_id, valid_nums, invalid_nums, total_nums)) # f_success.write("\n") else: print("数据源更新失败,threadName: %s,data_source_id: %s,valid_nums: %s,invalid_nums: %s,total_nums: %s" % ( threadName, g_data_source_id, valid_nums, invalid_nums, total_nums)) print("file_paths: %s" % file_paths + "\n") # 数据源更新失败 # file_path.write("%s" % file_paths) # file_path.write("\n") alter('数据源更新失败!data_source_id: %s' % g_data_source_id) else: alter('文件上传存在问题,请检查!') def main(advertiser_id, data_source_name, app_id, secret, path): access_token = '' while access_token == '': token_json = refresh_access_token(app_id, secret, path) if token_json["code"] == 0: access_token = token_json['data']['access_token'] strs = os.popen("ls " + path + "/*.zip").read().split("\n") paths = [([]) for i in range(5)] for files in strs: if files != '': id = files[files.__len__() - 7:files.__len__() - 4] print("files : %s,id : %s" % (files, id)) if id == '000': num = int('0') elif id[0:2] == '00': num = int(id[2]) elif id[0:1] == '0': num = int(id[1:]) else: num = int(id) if num % 5 == 0: paths[0].append(files) elif num % 5 == 1: paths[1].append(files) elif num % 5 == 2: paths[2].append(files) elif num % 5 == 3: paths[3].append(files) elif num % 5 == 4: paths[4].append(files) threads = [] # 创建新线程 thread_list = [0, 1, 2, 3, 4] for id in thread_list: thread = customThread(id, "Thread-%d" % id, advertiser_id, data_source_name, access_token, path, paths[id]) thread.start() threads.append(thread) for t in threads: t.join() print("主进程结束!") print('total_valid_num: %s' % total_valid_num) print('total_invalid_num: %s' % total_invalid_num) print('all_num: %s' % all_num) os.system('python2 ../python/sendMail.py -t jinfeng.wang@mobvista.com -c \'jinfeng.wang@mobvista.com\' ' '-s "今日头条上传数据" -b "Hi,<br/> All,本次上传数据已完成。<br/>' ' 校验成功:%s条,校验失败:%s条。数据源ID:%s<br/>"' % ( total_valid_num, total_invalid_num, g_data_source_id)) def alter(body): os.system('python2 ../python/sendMail.py -t jinfeng.wang@mobvista.com -c \'jinfeng.wang@mobvista.com\' ' '-s "今日头条上传数据" -b "Hi,<br/> 本次上传数据异常。原因:%s"' % body) if __name__ == '__main__': # advertiser_id, data_source_name, app_id, secret, path main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]) # main("109058238138", "mintegral_pkg", "1620985601930253", "5be31e12d0f5c62b62a0e621cd5cf07c8abb0dc7", # "/Users/wangjf/Workspace/data/output_1")