Commit 23dfd046 by AnHui

Initial commit

parents
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (py3.6)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ocean_engine.iml" filepath="$PROJECT_DIR$/.idea/ocean_engine.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (py3.6)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
# -*- ecoding: utf-8 -*-
# @ModuleName: __init__.py
# @Function:
# @Author: 安辉
# @Time: 2022/1/17 5:55 下午
# -*- ecoding: utf-8 -*-
# @ModuleName: ocean_engine_creative
# @Function:
# @Author: 安辉
# @Time: 2022/1/17 5:55 下午
import sys
sys.path.append('../')
import json
import traceback
from datetime import datetime
from utils.common import PERIOD_TYPE, LIST_TYPE, AGGR_CATEGORY_LIST, LANDING_TYPE, AGGR_APP_CODE, VIDEO_TYPE, \
VIDEO_DURATION_TYPE, IMAGE_MODE, AGGR_BUSINESS_CODE, ORDER_BY
from utils.proxy_utils import fetch_proxy_by_service
from utils.redis_utils import RedisUtils
from utils.mongo_utils import MongoUtils
from utils.logs import OceanCreativeLogger
import requests
import smtplib
from email.mime.text import MIMEText
from email.header import Header
from email.mime.multipart import MIMEMultipart
import pandas as pd
import redis
from itertools import combinations, product
from tqdm import tqdm
from multiprocessing import Pool
class OceanCreative(object):
def __init__(self):
self.log = OceanCreativeLogger
self.db_name = 'ocean_creative'
self.db = 9
self.redis_conn = RedisUtils(db=self.db)
self.mg_collection = f'{self.db_name}_{datetime.now().strftime("%Y-%m-%d")}'
self.mongo_conn = MongoUtils(db=self.db_name, collection=self.mg_collection)
self.video_ids = []
self.date = datetime.now().strftime("%Y{y}%m{m}%d{d}").format(y="年", m="月", d="日")
self.url = 'https://cc.oceanengine.com/creative_radar_api/v1/material/list'
self.headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
'referer': 'https://cc.oceanengine.com/inspiration/creative-radar/video',
'accept-language': 'zh-CN,zh;q=0.9',
}
def send_email(self):
# 发送邮箱服务器
smtp_server = "smtp.qq.com"
# 发送邮箱用户名密码
user = "984007548@qq.com"
password = "trpmnetjnvrhbedj"
# 发送和接收邮箱
sender = "984007548@qq.com"
receive = "anhui@reyun.com"
# 发送邮件主题和内容
subject = "巨量创意素材标签"
content = "<html><h1 style='color:red'>巨量创意素材标签</h1></html>"
# HTML邮件正文
# msg = MIMEText(content, 'html', 'utf-8')
# msg['Subject'] = Header(subject, 'utf-8')
# msg['From'] = "984007548@qq.com"
# msg['To'] = "anhui@reyun.com"
# SSL协议端口号要使用465
smtp = smtplib.SMTP_SSL(smtp_server, 465)
# HELO向服务器标志用户身份
smtp.helo(smtp_server)
# 服务器返回结果确认
smtp.ehlo(smtp_server)
# 登录邮箱服务器用户名密码
smtp.login(user, password)
send_file = open(r"./test.txt", "rb").read()
att = MIMEText(send_file, "base64", 'utf-8')
att['Content-Type'] = 'application/octet-stream'
att['Content-Disposition'] = f'attachment;filename="{self.date}巨量创意素材标签.csv"'
msgRoot = MIMEMultipart()
msgRoot.attach(MIMEText(content, 'html', 'utf-8'))
msgRoot['Subject'] = subject
msgRoot['From'] = sender
msgRoot['To'] = ','.join(receive)
msgRoot.attach(att)
self.log.info("Send email start...")
smtp.sendmail(sender, receive, msgRoot.as_string())
smtp.quit()
self.log.info("email send end!")
def test_params(self, list_type, aggr_category_list):
params = (
('list_type', list_type),
('material_type', '3'),
('order_by', 'total_play'),
('period_type', '3'),
('aggr_app_code', '4'),
('aggr_category_list', aggr_category_list),
('video_type', '[]'),
('landing_type', '[]'),
('limit', '24'),
('page', '1'),
('video_duration_type', '5'),
)
return params
def get_params(self, parameter, page):
meta = {}
params = (
('list_type', parameter[0]),
('material_type', '3'),
('order_by', 'total_play'),
('period_type', '3'),
('aggr_app_code', parameter[1]),
('aggr_category_list', parameter[2]),
('video_type', [parameter[3]]),
('landing_type', '[]'),
# ('image_mode', parameter.get('image_mode')),
('video_duration_type', '5'),
('aggr_business_code', parameter[4]),
('limit', '24'),
('page', page),
)
meta['ad_type_id'] = parameter[0]
meta['media_id'] = parameter[1]
meta['video_type_id'] = parameter[3]
return params, meta
def parse(self, data):
self.log.info('正在解析数据...')
ad_data = data.get('data', {})
materials = ad_data.get('materials', {})
if not materials:
self.log.info(f'data:{data}')
self.log.info('当前参数组合暂无数据...')
return []
meta = data.get('meta', {})
results = []
for item in materials:
result = {}
# 广告类型
ad_type_id = meta.get('ad_type_id', '')
ad_type = LIST_TYPE.get(ad_type_id, '')
# 素材ID
material_id = item.get('material_id', '')
# 行业
first_industry = item.get('first_game_ad_industry_name', '')
second_industry = item.get('second_game_ad_industry_name', '')
industry = f'{first_industry}-{second_industry}'
# 文案
creative = item.get('bestTitle', '')
# 素材类型
video_type_id = meta.get('video_type_id')
material_type = VIDEO_TYPE.get(video_type_id, [])
# 推广目标
landing_type = item.get('landing_type', '')
promotion_target = LANDING_TYPE.get(landing_type, '')
media_id = meta.get('media_id', '')
media = AGGR_APP_CODE.get(media_id, '')
video_id = item.get('vid', '')
material_url = f'https://api.amemv.com/aweme/v1/play/?video_id={video_id}'
result['material_id'] = material_id
result['creative'] = creative
result['material_type'] = material_type
result['ad_type'] = ad_type
result['industry'] = industry
result['promotion_target'] = promotion_target
result['media'] = media
result['material_url'] = material_url
# if video_id in self.video_ids:
# self.log.info('数据重复,已过滤...')
# continue
# if video_id not in self.video_ids:
# self.video_ids.append(video_id)
# self.mongo_conn.insert(result)
# self.log.info(f'新增{len(self.video_ids)}条数据')
results.append(result)
self.mongo_conn.insert(results)
self.log.info('数据解析完毕...')
def get_response(self, params, meta=None):
try:
_, _, _, proxies = fetch_proxy_by_service()
proxies = proxies if sys.platform not in ['win32', 'darwin'] else None
response = requests.get(url=self.url, headers=self.headers, params=params, proxies=proxies, verify=False,
timeout=5)
data = response.json()
data['meta'] = meta
if not meta:
return data
except:
traceback.print_exc()
else:
self.parse(data)
def run(self, parameter):
page = 1
self.log.info(f'当前参数组合:{parameter}')
params, meta = self.get_params(parameter, page)
self.get_response(params, meta)
if __name__ == '__main__':
industry = AGGR_CATEGORY_LIST.values()
industry_ids = [i.get('child_ids') for i in industry]
parameters = product(LIST_TYPE.keys(), [4, 8], industry_ids, VIDEO_TYPE.keys(), AGGR_BUSINESS_CODE.keys())
parameters = list(parameters)
parameter = parameters.pop()
oc = OceanCreative()
while True:
lens = len(parameters)
if lens <= 0:
break
batch = 10 if lens >= 10 else lens
pool = Pool(batch)
pool.apply_async(oc.run, (parameter,))
pool.close()
pool.join()
# -*- ecoding: utf-8 -*-
# @ModuleName: test
# @Function:
# @Author: 安辉
# @Time: 2022/1/19 5:49 下午
from itertools import combinations, product
from multiprocessing import Pool
import os, time
from tqdm import tqdm
import time
tmp = product((1, 2, 3), [4, 5], (6, 7))
print(list(tmp))
# for i in tqdm(range(100)):
# # print('测试')
# time.sleep(0.1)
# print('')
# def work(n):
# print('%s run' % os.getpid())
# time.sleep(3)
# return n ** 2
#
#
# if __name__ == '__main__':
# p = Pool(3) # 进程池中从无到有创建三个进程,以后一直是这三个进程在执行任务
# res_l = []
# for i in range(10):
# res = p.apply_async(work, args=(i,)) # 同步运行,阻塞、直到本次任务执行完毕拿到res
# res_l.append(res)
#
# # 异步apply_async用法:如果使用异步提交的任务,主进程需要使用jion,等待进程池内任务都处理完,然后可以用get收集结果,否则,主进程结束,进程池可能还没来得及执行,也就跟着一起结束了
# p.close()
# p.join()
# for res in res_l:
# print(res.get()) # 使用get来获取apply_aync的结果,如果是apply,则没有get方法,因为apply是同步执行,立刻获取结果,也根本无需get
hosts=(
39.104.17.39
)
# 本地目录
local_dir="/Users/anhui/codes/ocean_engine/"
#远程目录
target_dir="/data/ocean_engine"
#远程服务器登陆名
username="root"
# 远程服务器ip
#ip="39.104.170.192"
run_rsync(){
# ssh $username@$1 "sudo supervisorctl stop ry_load:"
# ssh $username@$1 "sudo supervisorctl stop wechat:"
rsync -avh --rsync-path="sudo rsync" \
--exclude="__pycache__" \
--exclude=".git" \
--exclude=".idea" \
--exclude=".gitignore" \
--exclude="update.sh" \
$local_dir \
$username@$1:$target_dir
# ssh $username@$1 "sudo supervisorctl restart ry_load:"
# ssh $username@$1 "sudo supervisorctl start wechat:"
}
for host in $hosts;do
run_rsync $host
echo $host "ok"
done
\ No newline at end of file
# -*- ecoding: utf-8 -*-
# @ModuleName: __init__.py
# @Function:
# @Author: 安辉
# @Time: 2022/1/17 5:51 下午
# -*- ecoding: utf-8 -*-
# @ModuleName: common
# @Function:
# @Author: 安辉
# @Time: 2022/1/17 5:52 下午
MATERIAL_TYPES = {
3: "视频",
2: "图片",
5: "落地页"
}
LIST_TYPE = {
1: "信息流广告",
3: "搜索广告",
2: "巨量千川",
4: "直播广告",
6: "原生广告"
}
AGGR_CATEGORY_LIST = {
'3C及电器': {
190101: '消费类电子产品',
190102: '电器',
190103: '其他3C及电器',
190104: '手机',
190105: '手机配件',
190106: '电脑',
190107: '电脑配件',
190108: '3C及电器电商',
190109: '3C及电器线下零售',
'child_ids': [190101, 190102, 190103, 190104, 190105, 190106, 190107, 190108, 190109]
},
'快速消费品': {
190201: '日化用品',
190202: '一般化妆品',
190203: '高档化妆品',
190204: '特殊化妆品',
190205: '酒',
190206: '烟',
190207: '成人用品',
190208: '宠物用品',
190209: '母婴用品',
190210: '其他快速消费品',
190211: '化妆品及日化电商',
190212: '化妆品及日化线下零售',
190213: '母婴用品电商',
190214: '母婴用品线下零售',
190215: '烟酒电商',
190216: '烟酒线下零售',
190217: '宠物用品电商',
190218: '宠物用品线下零售',
190219: '美妆工具',
190220: '特殊日化用品',
'child_ids': [190201, 190202, 190203, 190204, 190205, 190206, 190207, 190208, 190209, 190210, 190211, 190212,
190213, 190214, 190215, 190216, 190217, 190218, 190219, 190220]
},
'食品饮料': {
190301: '休闲零食',
190302: '饮料冲调',
190303: '乳制品及乳制品饮料',
190304: '粮油米面',
190305: '生鲜',
190306: '代餐',
190307: '其他食用初级农产品',
190308: '营养品',
190309: '其他食品饮料',
190310: '酒',
190311: '食品饮料(非生鲜)电商',
190312: '食品饮料(非生鲜)线下零售',
190313: '生鲜电商',
190314: '生鲜线下零售',
'child_ids': [190301, 190302, 190303, 190304, 190305, 190306, 190307, 190308, 190309, 190310, 190311, 190312,
190313, 190314]
},
'服装配饰': {
190401: '一般服装鞋帽',
190402: '高档服装鞋帽',
190403: '一般箱包眼镜',
190404: '高档箱包眼镜',
190405: '一般钟表',
190406: '高档钟表',
190407: '一般珠宝饰品',
190408: '高档珠宝饰品',
190409: '其他服装配饰',
190410: '服装配饰电商',
190411: '服装配饰线下零售',
'child_ids': [190401, 190402, 190403, 190404, 190405, 190406, 190407, 190408, 190409, 190410, 190411]
},
'医疗': {
190501: '医疗机构',
190503: '医疗器械',
190504: '药品',
190505: '保健品-国内',
190506: '医疗周边服务',
190507: '其他医疗',
190508: '医疗综合服务平台',
190509: '兽药',
190510: '网上药店/医药电商',
190511: '药店/医药线下零售',
190512: '保健品-跨境',
'child_ids': [190501, 190503, 190504, 190505, 190506, 190507, 190508, 190509, 190510, 190511, 190512]
},
'商务服务': {
190601: '安全安保',
190602: '出版传媒',
190603: '包装印刷',
190604: '中介服务',
190605: '管理咨询',
190606: '广告服务',
190607: '商演会展',
190608: '设计',
190609: '会计税务',
190610: '法律服务',
190611: '人力资源服务',
190612: '其他商务服务',
190613: '代运营服务',
'child_ids': [190601, 190602, 190603, 190604, 190605, 190606, 190607, 190608, 190609, 190610, 190611, 190612,
190613]
},
'生活服务': {
190701: '生活服务综合平台',
190702: '便民服务',
190703: '家政服务',
190704: '护养服务',
190705: '摄影',
190706: '美容美发',
190707: '回收买卖',
190708: '租赁服务',
190710: '婚恋服务',
190711: '配送服务',
190712: '移民服务',
190713: '情感咨询',
190714: '其他生活服务',
190715: '结婚服务',
190716: '婚嫁平台',
'child_ids': [190701, 190702, 190703, 190704, 190705, 190706, 190707, 190708, 190710, 190711, 190712, 190713,
190714, 190715, 190716]
},
'房地产': {
190801: '房地产开发商',
190802: '房地产中介',
190803: '物业管理公司',
190804: '房地产销售代理',
190805: '其他房地产',
190806: '房地产综合服务平台',
'child_ids': [190801, 190802, 190803, 190804, 190805, 190806]
},
'家居建材': {
190901: '家装主材',
190902: '五金电工',
190903: '装修设计',
190904: '家具',
190905: '家居饰品',
190906: '其他家居建材',
190907: '家居百货',
190908: '装修环境治理',
190909: '家装辅材',
190910: '建筑工程',
190911: '家居建材综合服务平台',
190912: '家居建材电商',
190913: '家居建材线下零售',
'child_ids': [190901, 190902, 190903, 190904, 190905, 190906, 190907, 190908, 190909, 190910, 190911, 190912,
190913]
},
'教育培训': {
191001: '幼儿教育',
191002: '中小学教育',
191003: '学历教育',
191004: '语言及留学',
191005: '兴趣培训',
191006: '职业技能',
191007: '企业管理培训',
191008: '特殊人群教育',
191009: '职业资格考证培训',
191010: '其他教育培训',
'child_ids': [191001, 191002, 191003, 191004, 191005, 191006, 191007, 191008, 191009, 191010]
},
'出行旅游': {
191101: '景点',
191102: '酒店住宿',
191103: 'OTA(online travel Agent)',
191104: '旅行社',
191105: '航空公司',
191106: '公路客运公司',
191107: '邮轮',
191108: '商旅票务代理',
191109: '城市出行',
191110: '其他出行旅游',
'child_ids': [191101, 191102, 191103, 191104, 191105, 191106, 191107, 191108, 191109, 191110]
},
'社会公共': {
191201: '政府政务文化旅游',
191202: '社会组织',
191203: '市政建设',
191204: '宗教',
191205: '其他社会公共',
191206: '政府政务科教卫生',
191207: '政府政务环境安全',
'child_ids': [191201, 191202, 191203, 191204, 191205, 191206, 191207]
},
'游戏': {
114: '捕鱼',
113: '棋牌游戏',
107: '三国',
102: '仙侠/武侠/玄幻',
112: '末日生存',
109: '战争',
106: '奇迹',
108: '西游',
104: '魔幻',
111: '二次元',
105: '科幻',
101: '传奇',
116: '体育游戏',
100: '其他题材',
103: '官斗',
117: '游戏盒子',
999: '其他',
115: '女性向',
110: '都市',
'child_ids': [114, 113, 107, 102, 112, 109, 106, 108, 104, 111, 105, 101, 116, 100, 103, 117, 999, 115, 110]
},
'零售': {
191401: '综合类2B电商',
191402: '垂直类2B电商',
191403: '综合类2C电商',
191404: '跨境类2C电商',
191405: '垂直类2C电商',
191406: '综合类线下零售',
191407: '跨境类线下零售',
191408: '其他零售',
'child_ids': [191401, 191402, 191403, 191404, 191405, 191406, 191407, 191408]
},
'交通工具': {
191501: '飞机厂商',
191502: '船舶厂商',
191503: '摩托车厂商',
191504: '非机动车厂商',
191505: '功能性车辆厂商',
191506: '其他交通工具',
191507: '交通工具后市场',
'child_ids': [191501, 191502, 191503, 191504, 191505, 191506, 191507]
},
'汽车': {
191601: '汽车厂商',
191602: '汽车经销商',
191603: '二手车线下零售',
191604: '汽车后市场',
191605: '其他汽车',
191606: '汽车综合服务平台',
191607: '新车线下零售',
'child_ids': [191601, 191602, 191603, 191604, 191605, 191606, 191607]
},
'农林牧畜渔': {
191701: '农业',
191702: '林业',
191703: '渔业',
191704: '畜牧业',
191705: '农林服务',
191706: '农产品加工',
191707: '化肥及农药',
191708: '其他农林牧畜渔',
'child_ids': [191701, 191702, 191703, 191704, 191705, 191706, 191707, 191708]
},
'化工及能源': {
191801: '化工制品',
191802: '消毒产品',
191803: '危险化学品',
191804: '食品化工材料',
191805: '化工其它',
191806: '燃料能源',
191807: '电力能源',
191808: '新能源',
191809: '矿产资源',
191810: '污染处理',
191811: '废旧回收',
191812: '节能',
191813: '其他化工及能源',
'child_ids': [191801, 191802, 191803, 191804, 191805, 191806, 191807, 191808, 191809, 191810, 191811, 191812,
191813]
},
'电子电工': {
191901: '电子器件',
191902: '仪器仪表',
191903: '电工电气',
191904: '电工机械',
191905: '其他电子电工',
'child_ids': [191901, 191902, 191903, 191904, 191905]
},
'机械设备': {
192001: '通用机械设备',
192002: '农林机械',
192003: '矿产机械',
192004: '建筑工程机械',
192005: '化工机械',
192006: '木材石材加工机械',
192007: '机床机械',
192008: '商用设备',
192009: '基础机械',
192010: '工具配件',
192011: '食品机械',
192013: '清洁通风设备',
192014: '其他机械设备',
192015: '机械设备线下零售',
'child_ids': [192001, 192002, 192003, 192004, 192005, 192006, 192007, 192008, 192009, 192010, 192011, 192013,
192014, 192015]
},
'文体娱乐': {
192101: '演出票务及周边',
192102: '文化艺术收藏品',
192103: '文具玩具礼品',
192104: '乐器',
192105: '室内娱乐',
192106: '运动健身',
192107: '体育用品',
192108: '户外用品',
192109: '体育赛事及场馆',
192110: '园艺',
192111: '彩票',
192112: '会员卡券',
192113: '户外娱乐',
192114: '其他文体娱乐',
192115: '文体娱乐电商',
192116: '文体娱乐线下零售',
'child_ids': [192101, 192102, 192103, 192104, 192105, 192106, 192107, 192108, 192109, 192110, 192111, 192112,
192113, 192114, 192115, 192116]
},
'传媒及内容': {
192201: '电视台',
192202: '广播台',
192203: '影视音像制作',
192204: '书籍杂志',
192205: '新闻资讯',
192206: '综合资讯',
192207: '垂直资讯',
192208: '网络视听',
192209: '其他传媒及内容',
192210: '书籍杂志-实体书',
'child_ids': [192201, 192202, 192203, 192204, 192205, 192206, 192207, 192208, 192209, 192210]
},
'物流业': {
192301: '快递物流',
192302: '货运代理',
192303: '特殊运输',
192304: '物流基础设施',
192305: '其他物流业',
'child_ids': [192301, 192302, 192303, 192304, 192305]
},
'通信': {
192401: '电信运营商',
192402: '虚拟运营商',
192403: '通信设备',
192404: '其他通信',
'child_ids': [192401, 192402, 192403, 192404]
},
'金融业': {
192501: '银行业',
192502: '证券业',
192503: '保险业',
192504: '信托业',
192505: '担保及保理',
192506: '拍卖典当',
192507: '期货',
192508: '基金',
192509: '征信机构',
192511: '其他金融业',
192512: '银行卡组织',
192601: '综合理财',
192604: '贷款服务',
192605: '外汇',
192606: '贵金属',
192607: 'P2P',
192608: '第三方支付',
192609: '众筹',
192610: '数字货币',
192611: '金融门户网站',
192614: '综合金融平台及工具',
'child_ids': [192501, 192502, 192503, 192504, 192505, 192506, 192507, 192508, 192509, 192511, 192512, 192601,
192604, 192605, 192606, 192607, 192608, 192609, 192610, 192611, 192614]
},
'互联金融业': {
192602: '互联网证券平台',
192603: '港股美股证券公司及证券投资',
192612: '互联网银行',
192613: '互联网保险',
192615: '其他互联网金融',
'child_ids': [192602, 192603, 192612, 192613, 192615]
},
'餐饮服务': {
192701: '餐厅',
192702: '饮品',
192703: '其他餐饮服务',
'child_ids': [192701, 192702, 192703]
},
'工具类软件': {
192801: '软件工具',
192802: '多媒体处理',
192803: '社交通讯',
192804: '实用工具',
192805: '其他工具类软件',
'child_ids': [192801, 192802, 192803, 192804, 192805]
},
'招商加盟': {
192901: '招商加盟联展平台',
192902: '生活用品加盟',
192903: '生活服务加盟',
192904: '服装配饰加盟',
192905: '美容美发加盟',
192906: '室内娱乐加盟',
192907: '教育培训加盟',
192908: '酒店加盟',
192909: '餐食加盟',
192910: '医疗医药加盟',
192911: '房产家居建材加盟',
192912: '食品加盟',
192913: '工业加盟',
192914: '汽车产品加盟',
192915: '互联网软件加盟',
192916: '机械电子加盟',
192917: '招聘中介加盟',
192918: '其他招商加盟',
192919: '减肥加盟',
192920: '饮品烘焙加盟',
192921: '保健品加盟',
192922: '酒加盟',
192923: '农业加盟',
192924: '商务服务加盟',
'child_ids': [192901, 192902, 192903, 192904, 192905, 192906, 192907, 192908, 192909, 192910, 192911, 192912,
192913, 192914, 192915, 192916, 192917, 192918, 192919, 192920, 192921, 192922, 192923, 192924]
}
}
LANDING_TYPE = {
999: "全部推广目标",
1: "销售线索收集",
2: "头条文章推广",
3: "应用推广",
4: "商品推广",
5: "商品目录推广",
6: "门店推广",
7: "抖音号推广",
8: "电商店铺推广",
0: "其他"
}
AGGR_APP_CODE = {
4: "抖音",
8: "头条",
1: "西瓜",
3: "抖音火山版",
9: "穿山甲",
38: "搜索位"
}
VIDEO_TYPE = {
30030006: '情景剧-多人情景剧',
30030007: '情景剧-单人情景剧',
30030008: '情景剧-情景演绎',
30030009: '真人口播-单人口播',
30030010: '真人口播-多人口播',
30030011: '商品展示-商品展示(有语音)',
30030012: '商品展示-商品展示(无语音)',
30030013: '图文展示-应用录屏',
30030014: '图文展示-纸上文字',
30030015: '图文展示-图文快闪',
30030016: '动画展示-游戏直播解说',
30030017: '动画展示-游戏录屏',
30030018: '动画展示-CG动画',
30030019: '其他视频类型-其他视频类型'
}
PERIOD_TYPE = {
1: "昨日",
3: "3日",
7: "7日",
30: "30日"
}
VIDEO_DURATION_TYPE = {
5: "全部时长",
1: "0-15秒",
2: "15-30秒",
3: "30-60秒",
4: "60秒以上"
}
IMAGE_MODE = {
5: "横版",
15: "竖版"
}
AGGR_BUSINESS_CODE = {
1: "本地上传",
2: "微电影",
3: "创意工具",
4: "巨量创意app",
5: "即合平台",
6: "其他-所有其他",
7: "橙子建站"
}
ORDER_BY = {
"total_play": "热度",
"click_show_rate": "点击率",
"convert_click_rate": "点击转化率",
"convert_show_rate": "曝光转化率",
"play_over_rate": "播放完成率",
"cost": "消耗金额",
"interactive_cnt": "互动量"
}
# -*- ecoding: utf-8 -*-
# @ModuleName: logs
# @Function:
# @Author: 安辉
# @Time: 2022/1/19 3:01 下午
import re
import logging
import sys
from logging.handlers import TimedRotatingFileHandler
if sys.platform in ["win32", "darwin"]:
PRJ_DEBUG = True
else:
PRJ_DEBUG = False
class Log(object):
"""日志类"""
def __init__(self, name, filename, level=logging.INFO):
self.logger = logging.getLogger(name)
# 控制日志文件中记录级别
self.formatter = logging.Formatter('%(levelname)s|%(asctime)s|%(module)s|%(funcName)s|%(message)s')
# 生产环境
if not PRJ_DEBUG:
# 日志保留3天,一天保存一个文件
self.fh = TimedRotatingFileHandler(filename, when='midnight', interval=1, backupCount=3)
# 删除设置
self.fh.suffix = '%Y-%m-%d.log'
self.fh.extMatch = re.compile(r'^\d{4}-\d{2}-\d{2}.log$')
# 定义日志文件中格式
self.fh.setFormatter(self.formatter)
self.logger.addHandler(self.fh)
self.logger.setLevel(level)
else:
# 控制输出到控制台日志格式、级别
self.ch = logging.StreamHandler()
self.ch.setFormatter(self.formatter)
self.logger.addHandler(self.ch)
self.logger.setLevel(logging.DEBUG)
def debug(self, msg):
self.logger.debug(msg)
def warning(self, msg):
self.logger.warning(msg)
def info(self, msg):
self.logger.info(msg)
def error(self, msg):
self.logger.error(msg)
def critical(self, msg):
self.logger.critical(msg)
def exception(self, msg):
self.logger.exception(msg)
def close(self):
self.logger.removeHandler(self.fh)
OceanCreativeLogger = Log('ocean_creative', '/data/logs/ocean_engine/ocean_creative.log')
# -*- ecoding: utf-8 -*-
# @ModuleName: mongo_utils
# @Function:
# @Author: 安辉
# @Time: 2022/1/18 5:30 下午
import sys
import pymongo
if sys.platform in ['win32', 'darwin']:
MONGO_CONF = {
'host': '39.99.32.199',
'port': 27016,
}
mongo_client = pymongo.MongoClient(host=MONGO_CONF['host'], port=MONGO_CONF['port'])
else:
MONGO_CONF = {
'host': '172.24.24.229',
'port': 27016,
'user': 'root',
'passwd': 'liujiatian@reyun'
}
mongo_client = pymongo.MongoClient(host=MONGO_CONF['host'], port=MONGO_CONF['port'], username=MONGO_CONF['user'],
password=MONGO_CONF['passwd'])
class MongoUtils(object):
def __init__(self, db=None, collection=None):
self.mongo_db = mongo_client[db]
self.collection = self.mongo_db[collection]
def insert(self, data):
if isinstance(data, list):
self.collection.insert_many(data)
elif isinstance(data, dict):
self.collection.insert(data)
def close_db(self):
mongo_client.close()
# -*- ecoding: utf-8 -*-
# @ModuleName: proxy_utils.py
# @Function:
# @Author: 安辉
# @Time: 2021/11/17 15:00
import sys
sys.path.append('../')
import time
import re
import random
from base64 import urlsafe_b64encode
from utils.redis_utils import redis_db3_conn, redis_db0_conn
CITY_IP_CONFIG = {
'池州': ['2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010'],
'黄冈': ['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020'],
'扬州': ['2021', '2022', '2023', '2024', '2025', '2026', '2027', '2028', '2029', '2030'],
'吉安': ['2031', '2032', '2033', '2034', '2035', '2036', '2037', '2038', '2039', '2040'],
'九江': ['2041', '2042', '2043', '2044', '2045', '2046', '2047', '2048', '2049', '2050'],
'芜湖': ['2051', '2052', '2053', '2054', '2055', '2056', '2057', '2058', '2059', '2060'],
'鹰潭': ['2061', '2062', '2063', '2064', '2065', '2066', '2067', '2068', '2069', '2070'],
'丽水': ['2071', '2072', '2073', '2074', '2075', '2076', '2077', '2078', '2079', '2080'],
'台州': ['2081', '2082', '2083', '2084', '2085', '2086', '2087', '2088', '2089', '2090'],
'绍兴': ['2091', '2092', '2093', '2094', '2095', '2096', '2097', '2098', '2099', '2100'],
}
def fetch_proxy_by_service(source='all'):
'''
通过第三方服务拿到代理
:return:
'''
# 阿布云
abuyun_proxy_list = [
'http://H60X43R32773Q47P:6740785F761EB814@http-pro.abuyun.com:9010',
'http://H8EYV13W48Y3BUXP:F5E9A0031C2682C0@http-pro.abuyun.com:9010',
'http://H52HS5328ICQ017P:6EBF9D1757F23D2E@http-pro.abuyun.com:9010',
'http://H670P7BTG7436H4P:EE57DA5267877D78@http-pro.abuyun.com:9010',
]
# 多贝云
duobei_proxy_list = [
'http://RYNETHTT3:gL0I092UHjf@http-proxy-t1.dobel.cn:9180',
'http://RYNETHTT4:gL0I092UHjf@http-proxy-t1.dobel.cn:9180',
'http://RYNETHTT5:gL0I092UHjf@http-proxy-t1.dobel.cn:9180',
'http://RYNETHTT6:gL0I092UHjf@http-proxy-t1.dobel.cn:9180',
# 'http://RYNETHTT7:gL0I092UHjf@http-proxy-t1.dobel.cn:9180', 逆向占用
]
if source == 'duobei':
proxy_list = duobei_proxy_list
elif source == 'abuyun':
proxy_list = abuyun_proxy_list
else:
proxy_list = abuyun_proxy_list + duobei_proxy_list
proxy = random.choice(proxy_list)
r = redis_db0_conn
user, password, host = re.search('http://(\w+):(\w+)@(.*)', proxy).groups()
proxy = f'http://{host}'
proxyAuth = 'Basic ' + urlsafe_b64encode(bytes((user + ':' + password), 'ascii')).decode('utf8')
full_proxy = f'http://{user}:{password}@{host}'
real_ip = r.get_real_ip(full_proxy)
proxy_request_format = {
'http': full_proxy,
'https': full_proxy
}
return proxy, real_ip, proxyAuth, proxy_request_format
last_fetch_time = 0
ip_list = []
def fetch_proxy_by_vps(city=''):
'''
通过vps获取代理IP
:return:
'''
global last_fetch_time, ip_list
current_ts = time.time()
fetch_interval = 10
redis_key = 'proxies_clean'
user = 'xzszlz'
password = 'Lsjkcbdjz666'
port = 18119
if current_ts - last_fetch_time > fetch_interval:
r = redis_db3_conn
last_fetch_time = current_ts
ip_list = r.redis_hvals(redis_key)
# if city in CITY_IP_CONFIG:
# ip_id = random.choice(CITY_IP_CONFIG[city])
# real_ip = r.redis_hget(redis_key, ip_id)
# else:
real_ip = random.choice(ip_list)
if isinstance(real_ip, bytes):
real_ip = real_ip.decode()
proxy = f'http://{real_ip}:{port}'
proxyAuth = 'Basic ' + urlsafe_b64encode(bytes((user + ':' + password), 'ascii')).decode('utf8')
full_proxy = f'http://{user}:{password}@{password}'
proxy_request_format = {
'http://': full_proxy,
'https://': full_proxy
}
return proxy, real_ip, proxyAuth, proxy_request_format
# -*- ecoding: utf-8 -*-
# @ModuleName: redis_utils
# @Function:
# @Author: 安辉
# @Time: 2022/1/18 4:31 下午
import re
import sys
import redis
if sys.platform in ['win32', 'darwin']:
# redis配置
REDIS_HOST = '39.99.32.199'
REDIS_PORT = 6379
REDIS_PARAMS = {
'password': 'reyun_adi',
}
else:
# redis配置
REDIS_HOST = 'r-hp3d05tt4mpw03sw4u.redis.huhehaote.rds.aliyuncs.com'
REDIS_PORT = 6379
REDIS_PARAMS = {
'password': 'Reyun_adi_redis',
}
REDIS_PASS = REDIS_PARAMS['password']
REDIS_DB = 0
class RedisUtils(object):
def __init__(self, db=REDIS_DB):
self.pool = redis.ConnectionPool(host=REDIS_HOST, port=REDIS_PORT, db=db, password=REDIS_PASS,
decode_responses=True)
self.conn = redis.Redis(connection_pool=self.pool)
def redis_sadd(self, key, value):
self.conn.sadd(key, value)
def get_real_ip(self, proxy: str):
if re.search(r'((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))',
proxy):
return re.search(
r'((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))',
proxy).group(1)
r = self.conn
try:
real_ip = r.hget('proxy:real_ip', proxy).decode('utf-8')
except:
real_ip = ''
return real_ip
redis_db3_conn = RedisUtils(3)
redis_db0_conn = RedisUtils(0)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment