Commit 323fc3f8 by zhangxiaoyan

reyun pkg match

parent 6977b318
...@@ -3,10 +3,15 @@ package common.repository; ...@@ -3,10 +3,15 @@ package common.repository;
import common.model.AppInfo; import common.model.AppInfo;
import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.JpaSpecificationExecutor; import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
import org.springframework.data.jpa.repository.Query;
import java.util.List;
/** /**
* Created by zxy on 2017/12/26. * Created by zxy on 2017/12/26.
*/ */
public interface AppInfoRepository extends JpaRepository<AppInfo, Long>, JpaSpecificationExecutor<AppInfo> public interface AppInfoRepository extends JpaRepository<AppInfo, Long>, JpaSpecificationExecutor<AppInfo>
{ {
@Query(value = "select id,company,firstCate,location,logoUrl,name,os,otherCate,pkgName,reyun,secondCate,thirdCate from AppInfo a where reyun=0 or reyun is null")
List<AppInfo> findAppListByReyun();
} }
...@@ -9,11 +9,16 @@ import common.repository.CityRepository; ...@@ -9,11 +9,16 @@ import common.repository.CityRepository;
import common.repository.UserRepository; import common.repository.UserRepository;
import dmp.model.TagCrawlerAppsWandoujia; import dmp.model.TagCrawlerAppsWandoujia;
import dmp.repository.TagCrawlerAppsWandoujiaRepository; import dmp.repository.TagCrawlerAppsWandoujiaRepository;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.collections.map.HashedMap;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import security.annotation.Authorization; import security.annotation.Authorization;
import tkio.model.App; import tkio.model.App;
import tkio.repository.AccountRepository; import tkio.repository.AccountRepository;
import util.Constant;
import util.DateUtil; import util.DateUtil;
import util.HttpClientUtil;
import util.StringUtil; import util.StringUtil;
import java.util.ArrayList; import java.util.ArrayList;
...@@ -38,6 +43,7 @@ public class SyncAppDataTask ...@@ -38,6 +43,7 @@ public class SyncAppDataTask
{ {
System.out.println(DateUtil.getBeforeDays(1)); System.out.println(DateUtil.getBeforeDays(1));
List<TagCrawlerAppsWandoujia> list = tagRepository.findAllByDs("2017-12-15"); List<TagCrawlerAppsWandoujia> list = tagRepository.findAllByDs("2017-12-15");
//从网上找了一个一线城市以及地级市的列表
List<String> citys = cityRepository.findCitys(); List<String> citys = cityRepository.findCitys();
List<AppInfo> appInfos = new ArrayList<>(); List<AppInfo> appInfos = new ArrayList<>();
List<AppCategory> appCategories = appCategoryRepository.findAll(); List<AppCategory> appCategories = appCategoryRepository.findAll();
...@@ -51,16 +57,25 @@ public class SyncAppDataTask ...@@ -51,16 +57,25 @@ public class SyncAppDataTask
map.put(appCategory.getLevel(), innerList); map.put(appCategory.getLevel(), innerList);
} }
List<String> ry_pkgNameList = findReyunPkgList();
List<AppCategory> newCategorys = new ArrayList<>(); List<AppCategory> newCategorys = new ArrayList<>();
List<AppInfo> newInfos = new ArrayList<>(); List<AppInfo> newInfos = new ArrayList<>();
for (TagCrawlerAppsWandoujia tag : list) { for (TagCrawlerAppsWandoujia tag : list) {
AppInfo appInfo = new AppInfo(); AppInfo appInfo = new AppInfo();
appInfo.setName(tag.getName()); appInfo.setName(tag.getName());
appInfo.setCompany(tag.getMaker()); appInfo.setCompany(tag.getMaker());
//豌豆荚爬取的都是安卓的
appInfo.setOs("Android"); appInfo.setOs("Android");
appInfo.setPkgName(tag.getPkgname()); appInfo.setPkgName(tag.getPkgname());
//判断这批app中是否有热云已有的
if (ry_pkgNameList.contains(tag.getPkgname())) {
appInfo.setReyun(1);
}
appInfo.setLogoUrl(tag.getApplogo_link()); appInfo.setLogoUrl(tag.getApplogo_link());
String types = tag.getMix_types(); String types = tag.getMix_types();
//类别按照_分割,第一个是一级类别,第二个是二级类别,第三个是三级类别,剩下到组合起来放到其他类别中
//同时将爬取的数据中的类别字典取出来
if (!StringUtil.isEmpty(types)) { if (!StringUtil.isEmpty(types)) {
String[] typeArray = types.split("_"); String[] typeArray = types.split("_");
String otherCase = ""; String otherCase = "";
...@@ -99,6 +114,7 @@ public class SyncAppDataTask ...@@ -99,6 +114,7 @@ public class SyncAppDataTask
appInfo.setOtherCate(otherCase.substring(1)); appInfo.setOtherCate(otherCase.substring(1));
} }
} }
//看公司名字中是含有城市列表中的某个,记录app的公司所在地
for (String city : citys) { for (String city : citys) {
if (tag.getMaker().indexOf(city) > -1) { if (tag.getMaker().indexOf(city) > -1) {
appInfo.setLocation(city); appInfo.setLocation(city);
...@@ -108,10 +124,39 @@ public class SyncAppDataTask ...@@ -108,10 +124,39 @@ public class SyncAppDataTask
newInfos.add(appInfo); newInfos.add(appInfo);
System.out.println(appInfo); System.out.println(appInfo);
} }
//把以前爬取的app不是热云的拿出来,看一下现在是不是热云的,是的话,更新进去。
List<AppInfo> oldNotRyAppList = appInfoRepository.findAppListByReyun();
for (AppInfo app : oldNotRyAppList) {
if (ry_pkgNameList.contains(app.getPkgName())) {
app.setReyun(1);
System.out.println(app);
newInfos.add(app);
}
}
appInfoRepository.save(newInfos); appInfoRepository.save(newInfos);
appCategoryRepository.save(newCategorys); appCategoryRepository.save(newCategorys);
} }
public List<String> findReyunPkgList() {
Map<String, String> conditions = new HashedMap();
String url = Constant.reportUrl + "/api/manager/bysql";
String sql = "select distinct pkgname from tkio.tkio_app_pkgname where pkgname!='NULL'";
conditions.put("sql", sql);
conditions.put("dbtype", "mysql");
conditions.put("datatype", "list");
conditions.put("reportname", "pkgname_list");
String responseJson = HttpClientUtil.doHttpPostRequest(url, "manager", conditions);
JSONObject object = JSONObject.fromObject(responseJson);
JSONArray array = object.getJSONArray("val");
List<String> pkgName = new ArrayList<>();
for (int i=0; i<array.size(); i++) {
JSONObject o = array.getJSONObject(i);
pkgName.add(o.getString("pkgname"));
}
return pkgName;
}
public static void main(String[] args) public static void main(String[] args)
{ {
SyncAppDataTask task = new SyncAppDataTask(); SyncAppDataTask task = new SyncAppDataTask();
......
...@@ -22,6 +22,6 @@ ...@@ -22,6 +22,6 @@
<task:scheduled-tasks> <task:scheduled-tasks>
<!--//定时同步短链数据(每5分钟执行一次)--> <!--//定时同步短链数据(每5分钟执行一次)-->
<task:scheduled ref="SyncAppDataTask" method="syncAppData" cron="0 0 08 * * ?"/> <task:scheduled ref="SyncAppDataTask" method="syncAppData" cron="0 04 20 * * ?"/>
</task:scheduled-tasks> </task:scheduled-tasks>
</beans> </beans>
...@@ -22,6 +22,6 @@ ...@@ -22,6 +22,6 @@
<task:scheduled-tasks> <task:scheduled-tasks>
<!--//定时同步短链数据(每5分钟执行一次)--> <!--//定时同步短链数据(每5分钟执行一次)-->
<task:scheduled ref="SyncAppDataTask" method="syncAppData" cron="0 0 08 * * ?"/> <task:scheduled ref="SyncAppDataTask" method="syncAppData" cron="0 04 20 * * ?"/>
</task:scheduled-tasks> </task:scheduled-tasks>
</beans> </beans>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment