首页 > 代码库 > 从国家统计局采集最新行政区划分

从国家统计局采集最新行政区划分

  最近一新项目要用到国内行政区划数据,bing了一下,已有网友提供sql版本数据下载,但在本地查看数据不够新,至少我老家所在市2010年改名儿了这数据也看不到。所以说呢还是自己动手丰衣足食。 使用了JSON.NET

  1、

   

using System;using System.Collections.Generic;using System.Text;using System.Net;using System.IO; namespace DivisonsOfPRC{    /// <summary>    /// 演示代码,不建议在生产环境使用,请搜HttpClient    /// </summary>    public class Http    {        public string GET(string url)        {            HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create(url);            hwr.Method = "GET";            hwr.CookieContainer = new CookieContainer();            hwr.Accept = "*/*";            hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate");            hwr.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN");             hwr.Referer = "http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/";            hwr.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)";            hwr.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;             HttpWebResponse response = (HttpWebResponse)hwr.GetResponse();            Stream receiveStream = response.GetResponseStream();            Encoding encode = System.Text.Encoding.UTF8;            StreamReader readStream = new StreamReader(receiveStream, encode);             String linesOfHTML = readStream.ReadToEnd();            //System.Console.WriteLine(linesOfHTML);            //System.Console.ReadKey();             receiveStream.Close();            response.Close();            readStream.Close();            return linesOfHTML;        }    }}

 

生成JSON.NET

using System;using System.Collections.Generic;using System.Text;using System.Net;using System.IO; namespace DivisonsOfPRC{    /// <summary>    /// 演示代码,不建议在生产环境使用,请搜HttpClient    /// </summary>    public class Http    {        public string GET(string url)        {            HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create(url);            hwr.Method = "GET";            hwr.CookieContainer = new CookieContainer();            hwr.Accept = "*/*";            hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate");            hwr.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN");             hwr.Referer = "http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/";            hwr.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)";            hwr.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;             HttpWebResponse response = (HttpWebResponse)hwr.GetResponse();            Stream receiveStream = response.GetResponseStream();            Encoding encode = System.Text.Encoding.UTF8;            StreamReader readStream = new StreamReader(receiveStream, encode);             String linesOfHTML = readStream.ReadToEnd();            //System.Console.WriteLine(linesOfHTML);            //System.Console.ReadKey();             receiveStream.Close();            response.Close();            readStream.Close();            return linesOfHTML;        }    }}

测试用例:

 

using System;using System.Collections.Generic;using System.Text;using Newtonsoft.Json; namespace DivisonsOfPRC{    class Program    {        static void Main(string[] args)        {            string[] directs = new string[] { "北京市", "天津市", "上海市", "重庆市" };            Http http = new Http();            //1.采集html到本地            string html = http.GET("http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201401/t20140116_501070.html");            //System.IO.File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "dop.html", html, Encoding.UTF8);                         //2.分析html            //可以用HtmlAgilePack找到行政区划数据的核心html            //我这里就不演示HAP怎么使用了,咱简单点儿,就用字符串分析截取            int startIndex = html.LastIndexOf("TRS_Editor");            startIndex = html.IndexOf("<p class=\"", startIndex);            html = html.Substring(startIndex);            html = html.Substring(0, html.IndexOf("</div>"));             //真正分析省市区逻辑            string[] lines = html.Split(new string[] { "</p>" }, StringSplitOptions.RemoveEmptyEntries);            string code = null, name= null,line = null;                         List<Node> nodes = new List<Node>();            Node PrevCity = null;            Node PrevProvince = null;            for (int i = 0; i < lines.Length; i++)            {                Node nod = new Node();                line = ExtractHtml(lines[i], "align=\"justify\">", "");                code = line.Substring(0, line.IndexOf("&"));                name = line.Substring(line.LastIndexOf(";")+1).Trim();                nod.code = code;                nod.name = name;                 int timesOfSpaceOccure = CountString(line, "&nbsp;");                nod.spaces = timesOfSpaceOccure;                if (timesOfSpaceOccure == 3)                {                    nodes.Add(nod);                    PrevProvince = nod;                    PrevCity = null;                }                else                {                    if (timesOfSpaceOccure > PrevProvince.spaces)                    {                        //下一级别                        if (PrevCity != null && timesOfSpaceOccure > PrevCity.spaces)                        {                            if (PrevCity.cell == null)                            {                                PrevCity.cell = new List<Node>();                            }                            PrevCity.cell.Add(nod);                        }                        else                        {                            //                            if (PrevProvince.cell == null)                            {                                PrevProvince.cell = new List<Node>();                            }                            PrevProvince.cell.Add(nod);                            PrevCity = nod;                        }                    }                }                             }            JsonSerializerSettings settings = new JsonSerializerSettings();            settings.NullValueHandling = NullValueHandling.Ignore;            string json2 = JsonConvert.SerializeObject(nodes, Newtonsoft.Json.Formatting.None, settings);            System.IO.File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "data.js", json2, Encoding.UTF8);        }         static string ExtractHtml(string source, string prefix, string suffix)        {            if (string.IsNullOrEmpty(source))            {                return null;            }            int startIndex = source.IndexOf(prefix);            if (startIndex == -1)            {                return string.Empty;            }            startIndex = startIndex + prefix.Length;            int endIndex = source.Length;            if (!string.IsNullOrEmpty(suffix))            {                endIndex = source.IndexOf(suffix, startIndex);                if (endIndex == -1)                {                    //suffix not found                    return string.Empty;                }            }            return source.Substring(startIndex, endIndex - startIndex);            //return null;        }         static int CountString(string source, string search)        {            int count = 0;            int startIndex = 0;            startIndex = source.IndexOf(search);            while (startIndex != -1)            {                startIndex = source.IndexOf(search, startIndex + search.Length);                count++;            }            return count;        }    }}

  

 

留下备用

从国家统计局采集最新行政区划分