博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
zlhome.com Deal
阅读量:5745 次
发布时间:2019-06-18

本文共 4379 字,大约阅读时间需要 14 分钟。

using AnfleCrawler.Common;using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;namespace AnfleCrawler.DataAnalyzer{    internal class Zlhome : AnalyzerBase    {        protected override void AnalyzeInternal(PageLandEntity current)        {            var lander = Crawler.Lander;            var pHandler = CreateContentHandler(current);            switch (current.Depth)            {                case 0:                    {                        var dom = lander.GetDocument(pHandler);                        DoPerPaging(current, dom.DocumentNode, ".page:first-child a:last-child");                        foreach (var node in QueryNodes(dom.DocumentNode, ".xqlistBox .l_img a"))                        {                            var url = GetHref(node, current.Url);                            Crawler.PushUrl(url, DataDepth.Houses);                        }                    }                    break;                case DataDepth.Houses:                    {                        var dom = lander.GetDocument(pHandler);                        var attrs = new AttributeFiller();                        attrs.Append("小区名称:{0}", QueryTexts(dom.DocumentNode, ".sc a").First().Replace("关注", string.Empty));                        attrs.Append(QueryTexts(dom.DocumentNode, ".c:last-child li"));                        Guid hashKey = GenHashKey(current.Url.OriginalString);                        var bo = Crawler.Repository.LoadHouses(hashKey);                        bo.SiteID = "Zlhome.com";                        bo.PageUrl = current.Url.OriginalString;                        bo.CityName = Crawler.Config.CityName;                        attrs.FillEntity(bo, new Dictionary
() { {
"地址", "小区地址"}, {
"所属片区", "所属区域"}, {
"物业类型", "物业类别"}, {
"骏工日期", "竣工时间"}, }); MapMark(bo); Repository.Save(bo); Crawler.OutWrite("保存楼盘 {0}", bo.小区名称); var pNode = QueryNodes(dom.DocumentNode, ".xqinfo").Skip(1).First(); var dealNode = QueryNode(pNode, "a"); var url = GetHref(dealNode, current.Url); Crawler.PushUrl(url, DataDepth.Deal, bo.RowID); } break; case DataDepth.Deal: { Guid housesID = (Guid)current.State; var dom = lander.GetDocument(pHandler); bool isRent = false; foreach (var table in QueryNodes(dom.DocumentNode, ".cjxxtable")) { foreach (var node in QueryNodes(table, "tr")) { var spans = QueryTexts(node, "td").ToArray(); DateTime? transactionDate = null; DateTime dump; if (DateTime.TryParse(spans[0], out dump)) { transactionDate = dump; } Repository.SaveHouselisting(new HouselistingEntity() { HousesID = housesID, TransactionDate = transactionDate, Area = spans[1], Apartment = spans[3], Orientation = spans[4], Floor = spans[5], UnitPriceOrLease = spans[6], SoldPriceOrRent = spans[7], ServiceBroker = spans[8], IsRent = isRent }); Crawler.OutWrite("保存小区{1}记录 {0}", housesID, isRent ? "出租" : "出售"); } isRent = true; } } break; } } }}

 

转载于:https://www.cnblogs.com/Googler/p/4272669.html

你可能感兴趣的文章
F#初学笔记06
查看>>
实战:将企业域名解析委派给企业DNS服务器
查看>>
在Lync 2013环境部署Office Web Apps
查看>>
微软大会Ignite,你准备好了么?
查看>>
读书笔记-高标管事 低调管人
查看>>
Master带给世界的思考:是“失控”还是进化
查看>>
用户和开发者不满苹果iCloud问题多多
查看>>
java.lang.UnsatisfiedLinkError:no dll in java.library.path终极解决之道
查看>>
我的工具:文本转音频文件
查看>>
【许晓笛】从零开始运行EOS系统
查看>>
【跃迁之路】【460天】程序员高效学习方法论探索系列(实验阶段217-2018.05.11)...
查看>>
C++入门读物推荐
查看>>
TiDB 源码阅读系列文章(七)基于规则的优化
查看>>
面试中会遇到的正则题
查看>>
Spring之旅第八站:Spring MVC Spittr舞台的搭建、基本的控制器、请求的输入、表单验证、测试(重点)...
查看>>
数据结构与算法——常用排序算法及其Java实现
查看>>
你所不知的Webpack-多种配置方法
查看>>
React.js 集成 Kotlin Spring Boot 开发 Web 应用实例详解
查看>>
webpack+typescript+threejs+vscode开发
查看>>
python读excel写入mysql小工具
查看>>