首页 > 代码库 > Cric bak realestate.cei.gov.cn

Cric bak realestate.cei.gov.cn

using AnfleCrawler.Common;using System;using System.Collections.Concurrent;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;namespace AnfleCrawler.DataAnalyzer{    public class ManualAnalyzer : AnalyzerBase    {        private ConcurrentQueue<string[]> _dict = new ConcurrentQueue<string[]>();        public override void Init(PageCrawler crawler)        {            crawler.Lander.Idle += Lander_Idle;            base.Init(crawler);            var url = new Uri("http://www.realestate.cei.gov.cn/traden/br2.aspx?rq=20140601&lx=w6&r1=20140830");            var dom = Crawler.Lander.GetDocument(new PageContentHandler() { Url = url });            foreach (var node in QueryNodes(dom.DocumentNode, "#qrq option"))            {                string val = node.GetAttributeValue("value", string.Empty);                Crawler.PushUrl(new Uri(string.Format("http://www.realestate.cei.gov.cn/traden/br2.aspx?rq={0}&lx=w6&r1=20140830", val)), 1);            }        }        void Lander_Idle(object sender, EventArgs e)        {            Crawler.OutWrite("Start step2...");            App.LogInfo("Start step2...");            using (var writer = new System.IO.StreamWriter(@"D:\outdict.txt", false, Encoding.UTF8))            {                foreach (var set in _dict)                {                    writer.WriteLine(string.Join(",", set));                }            }        }        protected override void AnalyzeInternal(PageLandEntity current)        {            Crawler.OutWrite("*Start step1...");            var lander = Crawler.Lander;            var pHandler = CreateContentHandler(current);            switch (current.Depth)            {                case 1:                    {                        var query = System.Web.HttpUtility.ParseQueryString(current.Url.Query);                        var dt = DateTime.ParseExact(query["rq"], "yyyyMMdd", null);                        var dom = lander.GetDocument(pHandler);                        var checkNode = QueryNode(dom.DocumentNode, "#str1");                        if (string.IsNullOrWhiteSpace(checkNode.InnerText))                        {                            return;                        }                        checkNode.InnerHtml = checkNode.InnerHtml.Replace("<tr", "</tr><tr").Substring(5);                        App.LogInfo("WTF CN:{0}", checkNode.InnerHtml);                        var set = QueryNodes(checkNode, "tr");                        foreach (var node in set)                        {                            var x = new List<string>();                            x.Add(dt.ToString("yyyy-MM-dd"));                            x.AddRange(QueryTexts(node, "td"));                            _dict.Enqueue(x.ToArray());                        }                        _dict.Enqueue(new string[] { Environment.NewLine });                        Crawler.OutWrite("#Stop step1 {0} {1}", dt.ToShortDateString(), set.Count());                    }                    break;            }        }    }}

 

Cric bak realestate.cei.gov.cn