首页 > 代码库 > 小巧抓取(省、市(区号\邮编)、县)数据

小巧抓取(省、市(区号\邮编)、县)数据

最近项目需要用到 城市的地址信息,但从网上下载的xml数据没有几个是最新的地址信息.....数据太老,导致有些地区不全。所以才想到天气预报官网特定有最新最全的数据。贴出代码,希望能给有同样困惑的朋友,减少一些时间。

	/**
	 * @param var  城市名称
	 * @return	string数组,0表示邮编	1表示区号
	 */
	@SuppressWarnings("deprecation")
	private String[] getZipCode(String var) {
		String[] code = new String[2];
		String zipCode_S = "邮编:";
		String zipCode_E = " ";
		String qhCode_S = "区号:";
		String qhCode_E = "</td>";
		String encode = URLEncoder.encode(var);
		try {
			URL url = new URL("http://www.ip138.com/post/search.asp?area="
					+ encode + "&action=area2zone");
			BufferedReader br = new BufferedReader(new InputStreamReader(
					url.openStream(), "GBK"));
			for (String line; (line = br.readLine()) != null;) {
				int zipNum = line.indexOf(zipCode_S);
				if (zipNum > 1) {
					String str = line.substring(zipNum + zipCode_S.length());
					str = str.substring(0, str.indexOf(zipCode_E));
					code[0] = str;
				}
				int qhNum = line.indexOf(qhCode_S);
				if(qhNum > 1)
				{
					String str = line.substring(qhNum + qhCode_S.length());
					str = str.substring(0, str.indexOf(qhCode_E));
					code[1] = str;
					break;
				}
			}
		} catch (Exception e) {
			System.out.println(var +"\t错误"+e.toString());
		}
		return code;
	}

	/**
	 * 	主程序
	 * @throws Exception
	 */
	@Test
	public void main() throws Exception
	{
		//1:获取所有省份
		TreeMap<String,String> provincesBuffer = getAddressInfo("http://www.weather.com.cn//data/city3jdata/china.html");
		Element prcEle = DocumentHelper.createElement("Provinces");
		
		//2:根据省份获取城市
		Element citysEle = DocumentHelper.createElement("Citys");
		
		//3:根据省份城市获取区、县
		Element distEle = DocumentHelper.createElement("Districts");
		int p = 1;
		int c = 1;
		int d = 1;
		for(Entry<String, String> prc : provincesBuffer.entrySet())
		{
			Element province = DocumentHelper.createElement("Province");
			province.addAttribute("ID",""+(p)).addAttribute("ProvinceName", prc.getValue()).addText(prc.getValue());
			//获取邮政编号
			TreeMap<String,String> cityBuffer = getAddressInfo("http://www.weather.com.cn/data/city3jdata/provshi/"+prc.getKey()+".html");
			for(Entry<String, String> citys : cityBuffer.entrySet())
			{
				Element city = DocumentHelper.createElement("City");
				String[] zipCode = getZipCode(citys.getValue());
				if(zipCode[0]==null||zipCode[1]==null)
					System.out.println("缺少"+citys.getValue()+"邮政或区号!");
				city.addAttribute("ID", ""+c).addAttribute("CityName", citys.getValue()).addAttribute("PID",p+"").addAttribute("ZipCode", zipCode[0]).addAttribute("AreaCode", zipCode[1]).addText(citys.getValue());
				TreeMap<String, String> distsBuffer = getAddressInfo("http://www.weather.com.cn/data/city3jdata/station/"+prc.getKey()+""+citys.getKey()+".html");
				for(Entry<String, String> dists : distsBuffer.entrySet())
				{
					String value = http://www.mamicode.com/dists.getValue();>

下载xml数据

小巧抓取(省、市(区号\邮编)、县)数据