瀏覽代碼

1.链家成交价抓取接口修改
2.新增htmlSend2kps方法调用kps的抓取接口

GouGengquan 2 月之前
父節點
當前提交
ccef50f5c3

+ 51 - 39
src/main/java/com/leeroa/dydb/datasource/lianjia/service/impl/DataLianjiaServiceImpl.java

@@ -201,51 +201,63 @@ public class DataLianjiaServiceImpl implements DataLianjiaService, BeanWrapCallb
     public int remoteFetch(LianjiaParams params, String batchNO) {
         String code = StringUtils.emptyDefault(params.getBizCode(), params.getAreaCode());
         Assert.hasText(code, "行政区域编码或者商圈编码不能为空!");
-        LianjiaResponse response = LianjiaUtils.fetch(params);
+
         int records = 0;
-        int total = response.getTotal();
-        int repeatTimes = 0;
-        // -1 表示上次发生了异常。重新执行5次,直到取到正确值
-        while (total == -1 && repeatTimes < 5) {
-            repeatTimes++;
-            response = LianjiaUtils.fetch(params);
-            total = response.getTotal();
-        }
-        Logger logger = Logger.getLogger(DataLianjiaServiceImpl.class);
-        logger.info(String.format("区域/商圈(%s)共有%s条数据可以进行抓取!当前页码:%s", code, total, params.getPage()));
-        if (total < 1) {
-            return records;
-        }
-        final Session session = HibernateUtils.getSession();
 
-        List<DataLianjia> data = response.getData();
-        if (data == null || data.isEmpty()) {
-            return records;
-        }
-        List<String> uniqueHouse = new ArrayList<>();
-        for (DataLianjia d : data) {
-            Double dealPrice = d.getDealPrice();
-            if ("cd".equals(params.getCityCode()) && dealPrice < 5000d){
-                continue;
+        // 模拟翻页
+        int startPage = params.getPage();
+        int endPage = params.getEndPage();
+        for (int i = startPage; i <= endPage; i++) {
+            // 读取每一页的链家数据
+            LianjiaResponse response = LianjiaUtils.fetch(params);
+            // total为空说明没数据,直接退出
+            if (response.getTotal() == null) {
+                break;
             }
-            if (!"cd".equals(params.getCityCode()) && dealPrice < 1000d){
-                continue;
+            int total = response.getTotal();
+            int repeatTimes = 0;
+            // -1 表示上次发生了异常。重新执行5次,直到取到正确值
+            while (total == -1 && repeatTimes < 5) {
+                repeatTimes++;
+                response = LianjiaUtils.fetch(params);
+                total = response.getTotal();
             }
-            records++;
-            d.setBatchNO(batchNO);
-            d.setProvinceName("四川");
-            d.setCityName(dataLianjiaCodeDao.getCityNameByCode(params.getCityCode()));
-            d.setCityCode(params.getCityCode());
-            if (StringUtils.isEmpty(d.getAreaName())) {
-                d.setAreaName(params.getAreaName());
+            Logger logger = Logger.getLogger(DataLianjiaServiceImpl.class);
+            logger.info(String.format("区域/商圈(%s)共有%s条数据可以进行抓取!当前页码:%s", code, total, params.getPage()));
+            if (total < 1) {
+                return records;
             }
-            if (StringUtils.isEmpty(d.getBizName())) {
-                d.setBizName(params.getBizName());
+            final Session session = HibernateUtils.getSession();
+
+            List<DataLianjia> data = response.getData();
+            if (data == null || data.isEmpty()) {
+                return records;
             }
-            session.save(d);
-            if (!uniqueHouse.contains(d.getHouses())){
-                updateCommunityPrice(d);
-                uniqueHouse.add(d.getHouses());
+            List<String> uniqueHouse = new ArrayList<>();
+            for (DataLianjia d : data) {
+                Double dealPrice = d.getDealPrice();
+                if ("cd".equals(params.getCityCode()) && dealPrice < 5000d){
+                    continue;
+                }
+                if (!"cd".equals(params.getCityCode()) && dealPrice < 1000d){
+                    continue;
+                }
+                records++;
+                d.setBatchNO(batchNO);
+                d.setProvinceName("四川");
+                d.setCityName(dataLianjiaCodeDao.getCityNameByCode(params.getCityCode()));
+                d.setCityCode(params.getCityCode());
+                if (StringUtils.isEmpty(d.getAreaName())) {
+                    d.setAreaName(params.getAreaName());
+                }
+                if (StringUtils.isEmpty(d.getBizName())) {
+                    d.setBizName(params.getBizName());
+                }
+                session.save(d);
+                if (!uniqueHouse.contains(d.getHouses())){
+                    updateCommunityPrice(d);
+                    uniqueHouse.add(d.getHouses());
+                }
             }
         }
         return records;

+ 3 - 1
src/main/java/com/leeroa/dydb/datasource/paimai/utils/LianjiaUtils.java

@@ -140,7 +140,9 @@ public class LianjiaUtils {
         response.setData(houseList);
 
         // 获取某一页的数据
-        String html = SpiderUtils.html(url.toString(),params.getHomeLinkCookie());
+        // String html = SpiderUtils.html(url.toString(),params.getHomeLinkCookie());
+        // 换成kps的爬虫方法
+        String html = SpiderUtils.htmlSend2kps(url.toString(),params.getHomeLinkCookie());
         if (html == null) {
             return response;
         }

文件差異過大導致無法顯示
+ 51 - 0
src/main/java/com/leeroa/dydb/datasource/utils/SpiderUtils.java