Commit 323fc3f8 by zhangxiaoyan

reyun pkg match

parent 6977b318
......@@ -3,10 +3,15 @@ package common.repository;
import common.model.AppInfo;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
import org.springframework.data.jpa.repository.Query;
import java.util.List;
/**
* Created by zxy on 2017/12/26.
*/
public interface AppInfoRepository extends JpaRepository<AppInfo, Long>, JpaSpecificationExecutor<AppInfo>
{
@Query(value = "select id,company,firstCate,location,logoUrl,name,os,otherCate,pkgName,reyun,secondCate,thirdCate from AppInfo a where reyun=0 or reyun is null")
List<AppInfo> findAppListByReyun();
}
......@@ -9,11 +9,16 @@ import common.repository.CityRepository;
import common.repository.UserRepository;
import dmp.model.TagCrawlerAppsWandoujia;
import dmp.repository.TagCrawlerAppsWandoujiaRepository;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.collections.map.HashedMap;
import org.springframework.beans.factory.annotation.Autowired;
import security.annotation.Authorization;
import tkio.model.App;
import tkio.repository.AccountRepository;
import util.Constant;
import util.DateUtil;
import util.HttpClientUtil;
import util.StringUtil;
import java.util.ArrayList;
......@@ -38,6 +43,7 @@ public class SyncAppDataTask
{
System.out.println(DateUtil.getBeforeDays(1));
List<TagCrawlerAppsWandoujia> list = tagRepository.findAllByDs("2017-12-15");
//从网上找了一个一线城市以及地级市的列表
List<String> citys = cityRepository.findCitys();
List<AppInfo> appInfos = new ArrayList<>();
List<AppCategory> appCategories = appCategoryRepository.findAll();
......@@ -51,16 +57,25 @@ public class SyncAppDataTask
map.put(appCategory.getLevel(), innerList);
}
List<String> ry_pkgNameList = findReyunPkgList();
List<AppCategory> newCategorys = new ArrayList<>();
List<AppInfo> newInfos = new ArrayList<>();
for (TagCrawlerAppsWandoujia tag : list) {
AppInfo appInfo = new AppInfo();
appInfo.setName(tag.getName());
appInfo.setCompany(tag.getMaker());
//豌豆荚爬取的都是安卓的
appInfo.setOs("Android");
appInfo.setPkgName(tag.getPkgname());
//判断这批app中是否有热云已有的
if (ry_pkgNameList.contains(tag.getPkgname())) {
appInfo.setReyun(1);
}
appInfo.setLogoUrl(tag.getApplogo_link());
String types = tag.getMix_types();
//类别按照_分割,第一个是一级类别,第二个是二级类别,第三个是三级类别,剩下到组合起来放到其他类别中
//同时将爬取的数据中的类别字典取出来
if (!StringUtil.isEmpty(types)) {
String[] typeArray = types.split("_");
String otherCase = "";
......@@ -99,6 +114,7 @@ public class SyncAppDataTask
appInfo.setOtherCate(otherCase.substring(1));
}
}
//看公司名字中是含有城市列表中的某个,记录app的公司所在地
for (String city : citys) {
if (tag.getMaker().indexOf(city) > -1) {
appInfo.setLocation(city);
......@@ -108,10 +124,39 @@ public class SyncAppDataTask
newInfos.add(appInfo);
System.out.println(appInfo);
}
//把以前爬取的app不是热云的拿出来,看一下现在是不是热云的,是的话,更新进去。
List<AppInfo> oldNotRyAppList = appInfoRepository.findAppListByReyun();
for (AppInfo app : oldNotRyAppList) {
if (ry_pkgNameList.contains(app.getPkgName())) {
app.setReyun(1);
System.out.println(app);
newInfos.add(app);
}
}
appInfoRepository.save(newInfos);
appCategoryRepository.save(newCategorys);
}
public List<String> findReyunPkgList() {
Map<String, String> conditions = new HashedMap();
String url = Constant.reportUrl + "/api/manager/bysql";
String sql = "select distinct pkgname from tkio.tkio_app_pkgname where pkgname!='NULL'";
conditions.put("sql", sql);
conditions.put("dbtype", "mysql");
conditions.put("datatype", "list");
conditions.put("reportname", "pkgname_list");
String responseJson = HttpClientUtil.doHttpPostRequest(url, "manager", conditions);
JSONObject object = JSONObject.fromObject(responseJson);
JSONArray array = object.getJSONArray("val");
List<String> pkgName = new ArrayList<>();
for (int i=0; i<array.size(); i++) {
JSONObject o = array.getJSONObject(i);
pkgName.add(o.getString("pkgname"));
}
return pkgName;
}
public static void main(String[] args)
{
SyncAppDataTask task = new SyncAppDataTask();
......
......@@ -22,6 +22,6 @@
<task:scheduled-tasks>
<!--//定时同步短链数据(每5分钟执行一次)-->
<task:scheduled ref="SyncAppDataTask" method="syncAppData" cron="0 0 08 * * ?"/>
<task:scheduled ref="SyncAppDataTask" method="syncAppData" cron="0 04 20 * * ?"/>
</task:scheduled-tasks>
</beans>
......@@ -22,6 +22,6 @@
<task:scheduled-tasks>
<!--//定时同步短链数据(每5分钟执行一次)-->
<task:scheduled ref="SyncAppDataTask" method="syncAppData" cron="0 0 08 * * ?"/>
<task:scheduled ref="SyncAppDataTask" method="syncAppData" cron="0 04 20 * * ?"/>
</task:scheduled-tasks>
</beans>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment