首页 > 代码库 > C#使用xpath简单爬取网站的内容

C#使用xpath简单爬取网站的内容

       public static void Get()
        {
            // string xpathtrI = "//*[@id=‘classify-list‘]/dl/dd/a/cite/span/i";
            #region 获取首页的分类

            ////主页的html地址
            //string urlHome = "http://www.qidian.com/";
            //HtmlWeb web = new HtmlWeb();
            //HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(urlHome);
            ////创建html的节点
            //HtmlNode rootNode1 = htmlDoc.DocumentNode;
            //string xpathtrA = "//*[@id=‘classify-list‘]/dl/dd";
            //HtmlNodeCollection classList = rootNode1.SelectNodes(xpathtrA);
            //List<string> listINode = new List<string>();
            //foreach (HtmlNode item in classList)
            //{
            //    //获取分类
            //    string inode = item.SelectSingleNode("//a/cite/span/i").InnerText;
            //    listINode.Add(inode);
            //}

            #endregion

            #region 分类及明细

            //string urlDetail = "http://xuanhuan.qidian.com/";
            //List<string> urlList = new List<string>();
            //urlList.Add("http://xuanhuan.qidian.com/");
            //urlList.Add("http://qihuan.qidian.com/");
            //urlList.Add("http://wuxia.qidian.com/");
            //urlList.Add("http://xianxia.qidian.com/");
            //urlList.Add("http://dushi.qidian.com/");
            //urlList.Add("http://zhichang.qidian.com/");
            //urlList.Add("http://junshi.qidian.com/");
            //urlList.Add("http://lishi.qidian.com/");
            //urlList.Add("http://youxi.qidian.com/");
            //urlList.Add("http://tiyu.qidian.com/");
            //urlList.Add("http://kehuan.qidian.com/");
            //urlList.Add("http://lingyi.qidian.com/");//foreach (string url in urlList)
            //{
            //    HtmlAgilityPack.HtmlDocument htmlDetail = web.Load(url);
            //    HtmlNode rootNode2 = htmlDetail.DocumentNode;
            //    string a = "//*[@class=‘book-info‘]";
            //    HtmlNodeCollection classList2 = rootNode2.SelectNodes(a);
            //    List<string> listINode2 = new List<string>();
            //    foreach (HtmlNode item in classList2)
            //    {
            //        //获取分类
            //        string inode = item.InnerHtml;
            //        listINode2.Add(inode);
            //    }
            //}

            #endregion

            #region 文章内容

            HtmlWeb web = new HtmlWeb();
            string u = "http://read.qidian.com/chapter/zOGI9RYmNdFhO--gcH8iFg2/h3iHSEH1cSpMs5iq0oQwLQ2";
            HtmlAgilityPack.HtmlDocument htmlDocment = web.Load(u);
            //创建html的节点
            HtmlNode htmlNode = htmlDocment.DocumentNode;
            string x = "//*[@class=‘read-content j_readContent‘]";
            HtmlNode htmlNodeP = htmlNode.SelectSingleNode(x);
            string htmlD = htmlNodeP.InnerText;

            #endregion

        }

                 只是拿一个例子而已。

C#使用xpath简单爬取网站的内容