首页 > 代码库 > 58.com qiyi
58.com qiyi
using AnfleCrawler.Common;using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;namespace AnfleCrawler.DataAnalyzer{ internal class Qy58 : AnalyzerBase { public override void Init(PageCrawler crawler) { base.Init(crawler); var url = new Uri("http://qy.58.com/caohejing/pn1/?PGTID=14177711280840.45006677554920316&ClickID=1"); //http://qy.58.com/19583455460359/?PGTID=14177659184690.5166369006238447&ClickID=4 crawler.PushUrl(url, 0); } protected override void AnalyzeInternal(PageLandEntity current) { var lander = Crawler.Lander; var pHandler = CreateContentHandler(current); switch (current.Depth) { case 0: { pHandler.AjaxBlocks.Add(HACK); var dom = lander.GetDocument(pHandler); DoPerPaging(current, dom.DocumentNode, ".next"); foreach (var node in QueryNodes(dom.DocumentNode, ".compList a")) { var url = GetHref(node, current.Url); Crawler.PushUrl(url, 1); } } break; case 1: { var dom = lander.GetDocument(pHandler); var attr = new AttributeFiller(); attr.Append("Name:{0}", QueryTexts(dom.DocumentNode, ".compT").First()); foreach (var th in QueryNodes(dom.DocumentNode, ".basicMsg table th").Skip(1)) { string sTh = th.InnerText, sTd; switch (sTh) { case "联系电话": case "邮箱": var client = new System.Net.WebClient(); var iNode = QueryNode(th.NextSibling, "img"); byte[] imgRaw = client.DownloadData(GetHref(iNode, current.Url, attrName: "src")); var img = new System.Drawing.Bitmap(new System.IO.MemoryStream(imgRaw)); sTd = OCR(img); break; case "公司地址": sTd = QueryTexts(th.NextSibling, "span").First(); break; default: sTd = th.NextSibling.InnerText.HtmlTrim(); break; } attr.Append("{0}:{1}", sTh, sTd); } var bo = new CompanyEntity(); bo.City = "上海"; bo.GroupName = "漕河泾企业"; bo.PageUrl = current.Url.OriginalString; bo.UpdateDate = DateTime.Now; attr.FillEntity(bo, new Dictionary<string, string>() { {"公司性质", "Nature"}, {"公司行业", "Industry"}, {"公司规模", "Scale"}, {"联系人", "ContactPerson"}, {"企业网址", "Website"}, {"联系电话", "Tel"}, {"邮箱", "Email"}, {"公司地址", "Address"}, }); Repository.SaveCompany(bo); Crawler.OutWrite("保存企业 {0}", bo.Name); } break; } } }}
58.com qiyi
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。