Jelajahi Sumber

1.八爪鱼脚本抓取的链家成交数据导入,导入时查重

GouGengquan 2 bulan lalu
induk
melakukan
db8fff8622

+ 5 - 0
src/main/java/com/leeroa/dydb/datasource/lianjia/domain/DataLianjia.java

@@ -32,9 +32,11 @@ public class DataLianjia extends Location implements AttachmentSymbol {
     @ApiField(value = "城市码")
     @Column
     private String cityCode;
+
     @ApiField(value = "链家的房屋ID")
     @Column(length = 20)
     private String houseNO;
+
     @ApiField(value = "挂牌日期")
     @Column(name = "up_date")
     private Date upDate;
@@ -46,6 +48,7 @@ public class DataLianjia extends Location implements AttachmentSymbol {
     @ApiField(value = "成交价格")
     @Column(name = "dealPrice")
     private Double dealPrice;
+
     @ApiField(value = "挂牌价格", desc = "万元")
     @Column
     private Double upPrice;
@@ -69,9 +72,11 @@ public class DataLianjia extends Location implements AttachmentSymbol {
     @ApiField(value = "梯户比")
     @Column(length = 12)
     private String stairRate;
+
     @ApiField(value = "总楼层")
     @Column(name = "floors")
     private Integer floors;
+
     @ApiField(value = "楼层信息")
     @Column(length = 20)
     private String floorInfo;

+ 16 - 0
src/main/java/com/leeroa/dydb/datasource/lianjia/dto/DataLianjiaDTO.java

@@ -10,51 +10,67 @@ import java.util.Date;
  */
 @ImportConfig(file = "", startRow = 1)
 public class DataLianjiaDTO implements DTO {
+
     // 挂牌日期
     @Col(index=0)
     private Date upDate;
+
     // 成交日期
     @Col(index=1)
     private Date dealDate;
+
     // 成交价格
     @Col(index=2)
     private Double dealPrice;
+
     // 成交总价
     @Col(index=3)
     private Double dealMoney;
+
     // 建筑面积
     @Col(index=4)
     private Double buildArea;
+
     // 套内面积
     @Col(index=5)
     private Double roomArea;
+
     // 总楼层
     @Col(index=6)
     private Integer floors;
+
     // 所在楼层
     @Col(index=7)
     private String onFloor;
+
     // 建筑类型
     @Col(index=8)
     private String buildType;
+
     // 建筑结构
     @Col(index=9)
     private String buildStructure;
+
     // 房屋朝向
     @Col(index=10)
     private String orientation;
+
     // 建筑年代
     @Col(index=11)
     private Integer buildYear;
+
     // 装修情况
     @Col(index=12)
     private String decorate;
+
     // 产权年限
     @Col(index=13)
     private String limitYear;
+
     // 电梯
     @Col(index=14)
     private String lift;
+
     // 房屋用途
     @Col(index=15)
     private String houseUsage;

+ 117 - 0
src/main/java/com/leeroa/dydb/datasource/lianjia/dto/DataLianjiaImportDTO.java

@@ -0,0 +1,117 @@
+package com.leeroa.dydb.datasource.lianjia.dto;
+
+import com.michael.poi.annotation.Col;
+import com.michael.poi.annotation.ImportConfig;
+import com.michael.poi.core.DTO;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 八爪鱼爬取数据导入DTO
+ *
+ * @author GouGengquan
+ */
+@Data
+@ImportConfig(file = "", startRow = 1)
+public class DataLianjiaImportDTO implements DTO {
+
+    /**
+     * 省
+     */
+    @Col(index = 0)
+    private String provinceName;
+
+    /**
+     * 城市码(对应链家的页面地址三级域名)
+     * 如: https://cd.lianjia.com/chengjiao/qingyang/
+     * 城市码则是cd
+     */
+    @Col(index = 1)
+    private String cityCode;
+
+    /**
+     * 城市名
+     */
+    @Col(index = 2)
+    private String cityName;
+
+    /**
+     * 区
+     */
+    @Col(index = 3)
+    private String areaName;
+
+    /**
+     * 小区/楼盘名称
+     */
+    @Col(index = 4)
+    private String houses;
+
+    /**
+     * 户型
+     */
+    @Col(index = 5)
+    private String roomType;
+
+    /**
+     * 建筑面积
+     */
+    @Col(index = 6)
+    private Double buildArea;
+
+    /**
+     * 房屋朝向
+     */
+    @Col(index = 7)
+    private String orientation;
+
+    /**
+     * 装修情况
+     */
+    @Col(index = 8)
+    private String decorate;
+
+    /**
+     * 所在楼层
+     */
+    @Col(index = 9)
+    private String onFloor;
+
+    /**
+     * 建筑类型
+     */
+    @Col(index = 10)
+    private String buildType;
+
+    /**
+     * 挂牌价格
+     */
+    @Col(index = 11)
+    private Double upPrice;
+
+    /**
+     * 成交日期
+     */
+    @Col(index = 12)
+    private Date dealDate;
+
+    /**
+     * 成交总价
+     */
+    @Col(index = 13)
+    private Double dealMoney;
+
+    /**
+     * 成交价格(单价,对应链家每平方价格)
+     */
+    @Col(index = 14)
+    private Double dealPrice;
+
+    /**
+     * url(链家对应的详情url)
+     */
+    @Col(index = 15)
+    private String url;
+
+}

+ 16 - 10
src/main/java/com/leeroa/dydb/datasource/lianjia/service/impl/DataLianjiaServiceImpl.java

@@ -5,7 +5,6 @@ import com.leeroa.base.attachment.utils.AttachmentHolder;
 import com.leeroa.base.attachment.vo.AttachmentVo;
 import com.leeroa.base.log.LogHelper;
 import com.leeroa.base.parameter.service.ParameterContainer;
-import com.leeroa.dydb.base.domain.Location;
 import com.leeroa.dydb.datasource.HouseCache;
 import com.leeroa.dydb.datasource.lianjia.bo.DataLianjiaBo;
 import com.leeroa.dydb.datasource.lianjia.bo.LianjiaParams;
@@ -13,10 +12,7 @@ import com.leeroa.dydb.datasource.lianjia.cache.LianjiaCache;
 import com.leeroa.dydb.datasource.lianjia.dao.DataLianjiaCodeDao;
 import com.leeroa.dydb.datasource.lianjia.dao.DataLianjiaDao;
 import com.leeroa.dydb.datasource.lianjia.domain.DataLianjia;
-import com.leeroa.dydb.datasource.lianjia.domain.DataLianjiaCode;
-import com.leeroa.dydb.datasource.lianjia.domain.DataLianjiaUp;
-import com.leeroa.dydb.datasource.lianjia.dto.DataLianjiaDTO;
-import com.leeroa.dydb.datasource.lianjia.service.DataLianjiaCodeService;
+import com.leeroa.dydb.datasource.lianjia.dto.DataLianjiaImportDTO;
 import com.leeroa.dydb.datasource.lianjia.service.DataLianjiaService;
 import com.leeroa.dydb.datasource.lianjia.vo.DataLianjiaVo;
 import com.leeroa.dydb.datasource.paimai.utils.LianjiaResponse;
@@ -63,7 +59,6 @@ import javax.annotation.Resource;
 import java.io.File;
 import java.io.IOException;
 import java.util.*;
-import java.util.stream.Collectors;
 
 /**
  * @author Michael
@@ -152,7 +147,7 @@ public class DataLianjiaServiceImpl implements DataLianjiaService, BeanWrapCallb
         logger.info("初始化导入引擎....");
 
         // 初始化引擎
-        Configuration configuration = new AnnotationCfgAdapter(DataLianjiaDTO.class).parse();
+        Configuration configuration = new AnnotationCfgAdapter(DataLianjiaImportDTO.class).parse();
         configuration.setStartRow(1);
 
         for (String id : attachmentIds) {
@@ -168,16 +163,27 @@ public class DataLianjiaServiceImpl implements DataLianjiaService, BeanWrapCallb
                 e.printStackTrace();
             }
             configuration.setPath(newFilePath);
-            configuration.setHandler(new Handler<DataLianjiaDTO>() {
+            configuration.setHandler(new Handler<DataLianjiaImportDTO>() {
                 @Override
-                public void execute(DataLianjiaDTO dto) {
+                public void execute(DataLianjiaImportDTO dto) {
                     Context context = RuntimeContext.get();
                     DataLianjia dataLianjia = new DataLianjia();
                     BeanUtils.copyProperties(dto, dataLianjia);
                     if (BeanCopyUtils.isEmpty(dataLianjia)) {
                         return;
                     }
-                    session.save(dataLianjia);
+                    // 根据小区/楼盘名称、成交日期与成交总价查询记录,用于判断是否有已存在的数据
+                    Criteria criteria = session.createCriteria(DataLianjia.class)
+                            .add(Restrictions.eq("houses", dataLianjia.getHouses()))
+                            .add(Restrictions.eq("dealDate", dataLianjia.getDealDate()))
+                            .add(Restrictions.eq("dealMoney", dataLianjia.getDealMoney()));
+                    // 判断是否存在重复数据
+                    boolean exists = criteria.uniqueResult() != null;
+                    if (!exists) { // 未重复新增
+                        session.save(dataLianjia);
+                    } else { // 已重复数据跳过不新增
+                        logger.info(String.format("重复的成交数据!小区/楼盘名称:%s-成交日期:%s-成交总价:%f万,该条数据已跳过!",dataLianjia.getHouses(),dataLianjia.getDealDate(),dataLianjia.getDealMoney()));
+                    }
                     if (context.getRowIndex() % 10 == 0) {
                         session.flush();
                         session.clear();

+ 2 - 2
src/main/webapp/app/dydb/data/dataLianjia/dataLianjia_import.jsp

@@ -35,8 +35,8 @@
                         </div>
                     </div>
                     <div class="button-row">
-                        <a class="btn" ng-href="<%=contextPath%>/dydb/data/dataLianjia/template" target="_blank"
-                           style="width: 160px;height: 50px;line-height: 50px;">下载数据模板</a>
+<%--                        <a class="btn" ng-href="<%=contextPath%>/dydb/data/dataLianjia/template" target="_blank"--%>
+<%--                           style="width: 160px;height: 50px;line-height: 50px;">下载数据模板</a>--%>
                         <button class="btn" ng-click="importData();" ng-disabled="!canImport"
                                 style="margin-left:80px;width: 150px;">执行导入
                         </button>

+ 9 - 1
src/main/webapp/app/dydb/data/dataLianjia/dataLianjia_list.js

@@ -194,6 +194,14 @@
             window.open(url);
         };
 
+        // 导入数据
+        $scope.importData = function () {
+            CommonUtils.addTab({
+                title: '导入链家数据',
+                url: '/dydb/data/dataLianjia/import',
+                onUpdate: $scope.query
+            });
+        };
 
         // 导出数据
         $scope.exportData = function () {
@@ -202,7 +210,7 @@
                 return;
             }
             if ($scope.pager.total > 20000) {
-                AlertFactory.error('要导出的数据过大,请进行选后再进行导出!');
+                AlertFactory.error('要导出的数据过大,请进行选后再进行导出!');
                 return;
             }
             var o = angular.extend({}, $scope.condition);

+ 3 - 0
src/main/webapp/app/dydb/data/dataLianjia/dataLianjia_list.jsp

@@ -156,6 +156,9 @@
                     <a type="button" class="btn btn-blue" ng-click="exportData();" ng-disabled="!pager.total" ng-cloak>
                         <i class="glyphicon glyphicon-export"></i> 导出数据
                     </a>
+                    <a type="button" class="btn btn-blue" ng-click="importData();" ng-cloak>
+                        <i class="glyphicon glyphicon-import"></i> 导入数据
+                    </a>
                     <a type="button" class="btn btn-blue" ng-click="remove();" ng-disabled="!anyone" ng-cloak>
                         <i class="glyphicon glyphicon-trash"></i> 删除
                     </a>