首页 > 代码库 > 从国家统计局采集最新行政区划分
从国家统计局采集最新行政区划分
最近一新项目要用到国内行政区划数据,bing了一下,已有网友提供sql版本数据下载,但在本地查看数据不够新,至少我老家所在市2010年改名儿了这数据也看不到。所以说呢还是自己动手丰衣足食。 使用了JSON.NET
1、
using System;using System.Collections.Generic;using System.Text;using System.Net;using System.IO; namespace DivisonsOfPRC{ /// <summary> /// 演示代码,不建议在生产环境使用,请搜HttpClient /// </summary> public class Http { public string GET(string url) { HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create(url); hwr.Method = "GET"; hwr.CookieContainer = new CookieContainer(); hwr.Accept = "*/*"; hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate"); hwr.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN"); hwr.Referer = "http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/"; hwr.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)"; hwr.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip; HttpWebResponse response = (HttpWebResponse)hwr.GetResponse(); Stream receiveStream = response.GetResponseStream(); Encoding encode = System.Text.Encoding.UTF8; StreamReader readStream = new StreamReader(receiveStream, encode); String linesOfHTML = readStream.ReadToEnd(); //System.Console.WriteLine(linesOfHTML); //System.Console.ReadKey(); receiveStream.Close(); response.Close(); readStream.Close(); return linesOfHTML; } }}
生成JSON.NET
using System;using System.Collections.Generic;using System.Text;using System.Net;using System.IO; namespace DivisonsOfPRC{ /// <summary> /// 演示代码,不建议在生产环境使用,请搜HttpClient /// </summary> public class Http { public string GET(string url) { HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create(url); hwr.Method = "GET"; hwr.CookieContainer = new CookieContainer(); hwr.Accept = "*/*"; hwr.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate"); hwr.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN"); hwr.Referer = "http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/"; hwr.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)"; hwr.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip; HttpWebResponse response = (HttpWebResponse)hwr.GetResponse(); Stream receiveStream = response.GetResponseStream(); Encoding encode = System.Text.Encoding.UTF8; StreamReader readStream = new StreamReader(receiveStream, encode); String linesOfHTML = readStream.ReadToEnd(); //System.Console.WriteLine(linesOfHTML); //System.Console.ReadKey(); receiveStream.Close(); response.Close(); readStream.Close(); return linesOfHTML; } }}
测试用例:
using System;using System.Collections.Generic;using System.Text;using Newtonsoft.Json; namespace DivisonsOfPRC{ class Program { static void Main(string[] args) { string[] directs = new string[] { "北京市", "天津市", "上海市", "重庆市" }; Http http = new Http(); //1.采集html到本地 string html = http.GET("http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201401/t20140116_501070.html"); //System.IO.File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "dop.html", html, Encoding.UTF8); //2.分析html //可以用HtmlAgilePack找到行政区划数据的核心html //我这里就不演示HAP怎么使用了,咱简单点儿,就用字符串分析截取 int startIndex = html.LastIndexOf("TRS_Editor"); startIndex = html.IndexOf("<p class=\"", startIndex); html = html.Substring(startIndex); html = html.Substring(0, html.IndexOf("</div>")); //真正分析省市区逻辑 string[] lines = html.Split(new string[] { "</p>" }, StringSplitOptions.RemoveEmptyEntries); string code = null, name= null,line = null; List<Node> nodes = new List<Node>(); Node PrevCity = null; Node PrevProvince = null; for (int i = 0; i < lines.Length; i++) { Node nod = new Node(); line = ExtractHtml(lines[i], "align=\"justify\">", ""); code = line.Substring(0, line.IndexOf("&")); name = line.Substring(line.LastIndexOf(";")+1).Trim(); nod.code = code; nod.name = name; int timesOfSpaceOccure = CountString(line, " "); nod.spaces = timesOfSpaceOccure; if (timesOfSpaceOccure == 3) { nodes.Add(nod); PrevProvince = nod; PrevCity = null; } else { if (timesOfSpaceOccure > PrevProvince.spaces) { //下一级别 if (PrevCity != null && timesOfSpaceOccure > PrevCity.spaces) { if (PrevCity.cell == null) { PrevCity.cell = new List<Node>(); } PrevCity.cell.Add(nod); } else { //市 if (PrevProvince.cell == null) { PrevProvince.cell = new List<Node>(); } PrevProvince.cell.Add(nod); PrevCity = nod; } } } } JsonSerializerSettings settings = new JsonSerializerSettings(); settings.NullValueHandling = NullValueHandling.Ignore; string json2 = JsonConvert.SerializeObject(nodes, Newtonsoft.Json.Formatting.None, settings); System.IO.File.WriteAllText(AppDomain.CurrentDomain.BaseDirectory + "data.js", json2, Encoding.UTF8); } static string ExtractHtml(string source, string prefix, string suffix) { if (string.IsNullOrEmpty(source)) { return null; } int startIndex = source.IndexOf(prefix); if (startIndex == -1) { return string.Empty; } startIndex = startIndex + prefix.Length; int endIndex = source.Length; if (!string.IsNullOrEmpty(suffix)) { endIndex = source.IndexOf(suffix, startIndex); if (endIndex == -1) { //suffix not found return string.Empty; } } return source.Substring(startIndex, endIndex - startIndex); //return null; } static int CountString(string source, string search) { int count = 0; int startIndex = 0; startIndex = source.IndexOf(search); while (startIndex != -1) { startIndex = source.IndexOf(search, startIndex + search.Length); count++; } return count; } }}
留下备用
从国家统计局采集最新行政区划分
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。