首页 > 代码库 > 58.com qiyi

58.com qiyi

using AnfleCrawler.Common;using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;namespace AnfleCrawler.DataAnalyzer{    internal class Qy58 : AnalyzerBase    {        public override void Init(PageCrawler crawler)        {            base.Init(crawler);            var url = new Uri("http://qy.58.com/caohejing/pn1/?PGTID=14177711280840.45006677554920316&ClickID=1");            //http://qy.58.com/19583455460359/?PGTID=14177659184690.5166369006238447&ClickID=4            crawler.PushUrl(url, 0);        }        protected override void AnalyzeInternal(PageLandEntity current)        {            var lander = Crawler.Lander;            var pHandler = CreateContentHandler(current);            switch (current.Depth)            {                case 0:                    {                        pHandler.AjaxBlocks.Add(HACK);                        var dom = lander.GetDocument(pHandler);                        DoPerPaging(current, dom.DocumentNode, ".next");                        foreach (var node in QueryNodes(dom.DocumentNode, ".compList a"))                        {                            var url = GetHref(node, current.Url);                            Crawler.PushUrl(url, 1);                        }                    }                    break;                case 1:                    {                        var dom = lander.GetDocument(pHandler);                        var attr = new AttributeFiller();                        attr.Append("Name:{0}", QueryTexts(dom.DocumentNode, ".compT").First());                        foreach (var th in QueryNodes(dom.DocumentNode, ".basicMsg table th").Skip(1))                        {                            string sTh = th.InnerText, sTd;                            switch (sTh)                            {                                case "联系电话":                                case "邮箱":                                    var client = new System.Net.WebClient();                                    var iNode = QueryNode(th.NextSibling, "img");                                    byte[] imgRaw = client.DownloadData(GetHref(iNode, current.Url, attrName: "src"));                                    var img = new System.Drawing.Bitmap(new System.IO.MemoryStream(imgRaw));                                    sTd = OCR(img);                                    break;                                case "公司地址":                                    sTd = QueryTexts(th.NextSibling, "span").First();                                    break;                                default:                                    sTd = th.NextSibling.InnerText.HtmlTrim();                                    break;                            }                            attr.Append("{0}:{1}", sTh, sTd);                        }                        var bo = new CompanyEntity();                        bo.City = "上海";                        bo.GroupName = "漕河泾企业";                        bo.PageUrl = current.Url.OriginalString;                        bo.UpdateDate = DateTime.Now;                        attr.FillEntity(bo, new Dictionary<string, string>()                         {                            {"公司性质", "Nature"},                            {"公司行业", "Industry"},                            {"公司规模", "Scale"},                            {"联系人", "ContactPerson"},                            {"企业网址", "Website"},                            {"联系电话", "Tel"},                            {"邮箱", "Email"},                            {"公司地址", "Address"},                        });                        Repository.SaveCompany(bo);                        Crawler.OutWrite("保存企业 {0}", bo.Name);                    }                    break;            }        }    }}

 

58.com qiyi