首页 > 代码库 > Cric bak realestate.cei.gov.cn
Cric bak realestate.cei.gov.cn
using AnfleCrawler.Common;using System;using System.Collections.Concurrent;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;namespace AnfleCrawler.DataAnalyzer{ public class ManualAnalyzer : AnalyzerBase { private ConcurrentQueue<string[]> _dict = new ConcurrentQueue<string[]>(); public override void Init(PageCrawler crawler) { crawler.Lander.Idle += Lander_Idle; base.Init(crawler); var url = new Uri("http://www.realestate.cei.gov.cn/traden/br2.aspx?rq=20140601&lx=w6&r1=20140830"); var dom = Crawler.Lander.GetDocument(new PageContentHandler() { Url = url }); foreach (var node in QueryNodes(dom.DocumentNode, "#qrq option")) { string val = node.GetAttributeValue("value", string.Empty); Crawler.PushUrl(new Uri(string.Format("http://www.realestate.cei.gov.cn/traden/br2.aspx?rq={0}&lx=w6&r1=20140830", val)), 1); } } void Lander_Idle(object sender, EventArgs e) { Crawler.OutWrite("Start step2..."); App.LogInfo("Start step2..."); using (var writer = new System.IO.StreamWriter(@"D:\outdict.txt", false, Encoding.UTF8)) { foreach (var set in _dict) { writer.WriteLine(string.Join(",", set)); } } } protected override void AnalyzeInternal(PageLandEntity current) { Crawler.OutWrite("*Start step1..."); var lander = Crawler.Lander; var pHandler = CreateContentHandler(current); switch (current.Depth) { case 1: { var query = System.Web.HttpUtility.ParseQueryString(current.Url.Query); var dt = DateTime.ParseExact(query["rq"], "yyyyMMdd", null); var dom = lander.GetDocument(pHandler); var checkNode = QueryNode(dom.DocumentNode, "#str1"); if (string.IsNullOrWhiteSpace(checkNode.InnerText)) { return; } checkNode.InnerHtml = checkNode.InnerHtml.Replace("<tr", "</tr><tr").Substring(5); App.LogInfo("WTF CN:{0}", checkNode.InnerHtml); var set = QueryNodes(checkNode, "tr"); foreach (var node in set) { var x = new List<string>(); x.Add(dt.ToString("yyyy-MM-dd")); x.AddRange(QueryTexts(node, "td")); _dict.Enqueue(x.ToArray()); } _dict.Enqueue(new string[] { Environment.NewLine }); Crawler.OutWrite("#Stop step1 {0} {1}", dt.ToShortDateString(), set.Count()); } break; } } }}
Cric bak realestate.cei.gov.cn
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。