首页 > 代码库 > 从高德地图大批量扒取数据,并写入excel

从高德地图大批量扒取数据,并写入excel

首先,源代码下载地址:http://down.51cto.com/data/2270088
然后,如果有问题,或者是积分不够可以发送邮件到我的邮箱whsgzcy@foxmail.com,我可以把源程序分享给大家,如果对用google抓取接口不熟悉的话,也可以直接找我,我的手机号码是:15656098064,很高兴能和各位同行分享,如果是打电话要我上厕所,我可以要骂人的呦~。 
最后,直接上代码。


package com.iwant.download2geodata;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import javax.naming.InitialContext;

import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;

import com.iwant.download2geodata.data.ShopInfo;
import com.iwant.download2geodata.data.ShopList;
import com.iwant.download2geodata.data.TemplateData;

import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import net.sf.json.JSONArray;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;

/**
 * @Description: 获取高德所有数据
 * @author: whsgzcy
 * @date: 2016-12-17 下午1:04:31 首先是从高德copy一个链接,只要修改pagernum参数即可拼接成新的链接
 *        抓取方式较原始,但可维护性高
 */
public class GeoGetDataThread extends Thread {

    public int pagernum = 1;// 跳转到下一页参数
    public String murl = "http://ditu.amap.com/service/poiInfo?query_type=TQUERY&pagesize=20&pagenum=";    
    public String nurl = "&qii=true&cluster_state=5&need_utd=true&utd_sceneid=1000&"
            + "div=PC1000&addr_poi_merge=true&is_classify=true&"
            + "city=321200&geoobj=119.848676%7C32.40107%7C119.93279%7C32.527222&keywords=%E7%94%B5%E5%8A%A8%E8%BD%A6";
    
    private HSSFWorkbook workbook = null; 
    private ShopList shopList = new ShopList();
    private List<ShopInfo> shopInfoList = new ArrayList<ShopInfo>();
    private List<TemplateData> templateDataList = new ArrayList<TemplateData>();
    
    /**
     * @Description: 根据pagernum跳转下一页
     * @author: whsgzcy
     * @date: 2016-12-17 下午1:17:08
     * @param pagernum
     */

    @Override
    public void run() {
        super.run();

//        // /先用本次文件进行测试
//        String encoding = "UTF-8";
//        StringBuilder builder = null;
//        try {
//            File file = new File("C:\\Users\\suzhe\\Desktop\\t.txt");
//            InputStreamReader read = new InputStreamReader(new FileInputStream(
//                    file), encoding);// 考虑到编码格式
//            BufferedReader bufferedReader = new BufferedReader(read);
//            builder = new StringBuilder();
//            for (String s = bufferedReader.readLine(); s != null; s = bufferedReader
//                    .readLine()) {
//                builder.append(s);
//            }
//        } catch (Exception e) {
//            e.printStackTrace();
//        }
//        if (null == builder) return;
//        jsonObject = JSONObject.fromObject(builder.toString());
        // 请求数据
        // 根据现有的数据量没有到list极限
        // 使用list存储数据,并在每次请求数据时休眠5秒,放缓线程以及防止扒取数据IP被封
        
        try {
            for(;;pagernum++){
                // 线程休眠
//                try {Thread.sleep(5000);} catch (InterruptedException e1) {e1.printStackTrace();}
                String url = murl+pagernum+nurl;
                System.out.println(url);
                JSONObject jsonObject = new JSONObject();
                jsonObject = HttpRequestUtil.getJsonObject(url);
                // 先打印count 判断是否有数据
                int count = jsonObject.getInt("count");
                if(0 == count) break;
                System.out.println("count = " + count);
                if (0 != count) {
                    JSONArray dataArray = jsonObject.getJSONArray("data");
                    JSONArray listArray = dataArray.getJSONObject(0).getJSONArray("list");
                    JSONObject line = null;
                    for (int i = 0; i < listArray.size(); i++) {
                        ShopInfo shopInfo = new ShopInfo();
                        TemplateData template = new TemplateData();
                        line = listArray.getJSONObject(i);
                        String name = line.getString("name");
                        shopInfo.setName(name);
                        String tel = line.getString("tel");
                        shopInfo.setTel(tel);
                        String address = line.getString("address");
                        shopInfo.setAddress(address);
                        String longitude = line.getString("longitude");
                        shopInfo.setLongitude(longitude);
                        String latitude = line.getString("latitude");
                        shopInfo.setLatitude(latitude);
                        JSONObject templateData = line.getJSONObject("templateData");
                        if (templateData.has("pic_info")) {
                            String pic_info = templateData.getString("pic_info");
                            template.setPic_info(pic_info);
                            templateDataList.add(template);
                            shopInfo.setTemplateData(templateDataList);
                        }else{
                            template.setPic_info("");
                            templateDataList.add(template);
                            shopInfo.setTemplateData(templateDataList);
                        }
                        shopInfoList.add(shopInfo);
                        System.out.println("数据读写中");
                    }
                    // 至此 第一页请求数据完成
                    shopList.setmShopInfoList(shopInfoList);
            }
                
                // 将数据写进Excel
                String title[] = {"店铺名称","店主","店铺简介","店铺电话","地址","经度","纬度","101充电桩","10有充电桩","照片","是否提供上门服务","图片链接"}; 
                createExcel("C:/Users/suzhe/Desktop/test2.xls","sheet1",title);
                    try {  
                        workbook = new HSSFWorkbook(new FileInputStream("C:/Users/suzhe/Desktop/test2.xls"));
                        //流  
                        FileOutputStream out = null;  
                        HSSFSheet sheet = workbook.getSheet("sheet1");
                        // 获取表格的总行数  
                        int rowCount = sheet.getLastRowNum() + 1; // 需要加一  
                        // 获取表头的列数  
                        int columnCount = sheet.getRow(0).getLastCellNum();  
                        Row row = sheet.createRow(rowCount);     //最新要添加的一行  
                        HSSFRow titleRow = sheet.getRow(0);  
                        
                        if(titleRow!=null){  
                                for(int i = 0; i < shopList.getmShopInfoList().size(); i++){
                                    
                                    row = sheet.createRow(i+1);     //最新要添加的一行  
                                    
                                    for(int k = 0; k < 12; k++){
                                    
                                        String name = shopInfoList.get(i).getName();
                                        Cell cellName = row.createCell(0);
                                        cellName.setCellValue(name);
                                        
                                        Cell cellMaster = row.createCell(1);                                
                                        cellMaster.setCellValue("无");
                                        Cell cellSay = row.createCell(2);
                                        cellSay.setCellValue("无");
                                        
                                        String tel = shopInfoList.get(i).getTel();
                                        Cell cellTel = row.createCell(3);
                                        cellTel.setCellValue(tel);
                                        
                                        String address = shopInfoList.get(i).getAddress();
                                        Cell cellAddress = row.createCell(4);
                                        cellAddress.setCellValue(address);
                                        
                                        String longitude = shopInfoList.get(i).getLongitude();
                                        Cell cellLongitude = row.createCell(5);
                                        cellLongitude.setCellValue(longitude);
                                        
                                        String latitude = shopInfoList.get(i).getLatitude();
                                        Cell cellLatitude = row.createCell(6);
                                        cellLatitude.setCellValue(latitude);
                                        
                                        Cell cellIsCharge = row.createCell(7);
                                        cellIsCharge.setCellValue("0");                                    
                                        Cell cellHaveCharge = row.createCell(8);
                                        cellHaveCharge.setCellValue("0");
                                        
                                        String pic_info = shopInfoList.get(i).getTemplateData().get(i).getPic_info();
                                        if(pic_info.equals("")){
                                            Cell cellPicUrl= row.createCell(11);
                                            cellPicUrl.setCellValue("");
                                            Cell cellPic = row.createCell(9);
                                            cellPic.setCellValue("");
                                        }else{
                                            Cell cellPicUrl= row.createCell(11);
                                            cellPicUrl.setCellValue(pic_info);
                                            Cell cellPic = row.createCell(9);
                                            cellPic.setCellValue("taizhou/"+(i+1)+".jpg");
                                        }
                                        
                                        Cell cellHome = row.createCell(10);
                                        cellHome.setCellValue("暂不提供上门服务");    
                                    }
                                }
                        }  
                        out = new FileOutputStream("C:/Users/suzhe/Desktop/test2.xls");  
                        workbook.write(out);  
                    }  catch (Exception e) {  
                        e.printStackTrace();  
                    }  
            }
        } catch (JSONException e) {
            e.printStackTrace();
        }
    }
    
    public void writeToExcel(String fileDir,String sheetName){  
        //创建workbook  
        File file = new File(fileDir);  
        try {  
            workbook = new HSSFWorkbook(new FileInputStream(file));  
        } catch (FileNotFoundException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        //流  
        FileOutputStream out = null;  
        HSSFSheet sheet = workbook.getSheet(sheetName);  
        // 获取表格的总行数  
        int rowCount = sheet.getLastRowNum() + 1; // 需要加一  
        // 获取表头的列数  
        int columnCount = sheet.getRow(0).getLastCellNum();  
        try {  
            Row row = sheet.createRow(rowCount);     //最新要添加的一行  
            //通过反射获得object的字段,对应表头插入  
            // 获取该对象的class对象  
//            Class class_ = object.getClass();  
            // 获得表头行对象  
            HSSFRow titleRow = sheet.getRow(0);  
            if(titleRow!=null){  
                for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {  //遍历表头  
                    String title = titleRow.getCell(columnIndex).toString().trim().toString().trim();  
                    Cell cell1 = row.createCell(0);
                    cell1.setCellValue("111");
                    Cell cell2 = row.createCell(1);
                    cell2.setCellValue("222");
                    Cell cell3 = row.createCell(2);
                    cell3.setCellValue("333");
                }  
            }  
            out = new FileOutputStream(fileDir);  
            workbook.write(out);  
        } catch (Exception e) {  
            e.printStackTrace();  
        } finally {    
            try {    
                out.close();    
            } catch (IOException e) {    
                e.printStackTrace();  
            }    
        }    
    } 
      /** 
     * 创建新excel. 
     * @param fileDir  excel的路径 
     * @param sheetName 要创建的表格索引 
     * @param titleRow excel的第一行即表格头 
     */  
    public void createExcel(String fileDir,String sheetName,String titleRow[]){  
        //创建workbook  
        workbook = new HSSFWorkbook();  
        //添加Worksheet(不添加sheet时生成的xls文件打开时会报错)  
        Sheet sheet1 = workbook.createSheet(sheetName);    
        //新建文件  
        FileOutputStream out = null;  
        try {  
            //添加表头  
            Row row = workbook.getSheet(sheetName).createRow(0);    //创建第一行    
            for(int i = 0;i < titleRow.length;i++){  
                Cell cell = row.createCell(i);  
                cell.setCellValue(titleRow[i]);  
            }  
            out = new FileOutputStream(fileDir);  
            workbook.write(out);  
        } catch (Exception e) {  
            e.printStackTrace();  
        } finally {    
            try {    
                out.close();    
            } catch (IOException e) {    
                e.printStackTrace();  
            }    
        }    
    }  

    /**
     * 
     * @Description:读取本地文件---/Users/whsgzcy/Desktop/t.txt
     * @author: whsgzcy
     * @date: 2016-12-17 下午5:35:21
     * @param filePath
     *            void
     * @throws
     */
    public static void readTxtFile(String filePath) {
        try {
            String encoding = "UTF-8";
            File file = new File(filePath);
            if (file.isFile() && file.exists()) { // 判断文件是否存在
                InputStreamReader read = new InputStreamReader(
                        new FileInputStream(file), encoding);// 考虑到编码格式
                BufferedReader bufferedReader = new BufferedReader(read);
                String lineTxt = null;
                while ((lineTxt = bufferedReader.readLine()) != null) {
                    System.out.println(lineTxt);
                }
                read.close();
            } else {
                System.out.println("找不到指定的文件");
            }
        } catch (Exception e) {
            System.out.println("读取文件内容出错");
            e.printStackTrace();
        }

    }

    public static void main(String[] args) {
        GeoGetDataThread dg = new GeoGetDataThread();
        dg.start();
    }

}
package com.iwant.download2geodata;

import java.io.BufferedReader;
import java.io.InputStreamReader;

import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;

import net.sf.json.JSONObject;



/**
 * @Description:
 * @author: whsgzcy
 * @date: 2016-12-17 下午1:30:05
 * 
 */
public class HttpRequestUtil {
    
    /**
    * @Description:纯get请求
    * @author: whsgzcy
    * @date: 2016-12-17 下午1:51:14  
    * @param url
    * @return
    * JSONObject
    * @throws
     */
    public static JSONObject getJsonObject(String url) {
        JSONObject jsonObject = null;
        try {
            HttpClient httpClient = new DefaultHttpClient();
            HttpGet httpGet = new HttpGet(url);
            HttpParams httpParams = httpClient.getParams();
            HttpConnectionParams.setConnectionTimeout(httpParams, 5000);
            HttpResponse response = httpClient.execute(httpGet);
            StringBuilder builder = new StringBuilder();
            BufferedReader bufferedReader = new BufferedReader(
                    new InputStreamReader(response.getEntity().getContent(),
                            "utf-8"));
            for (String s = bufferedReader.readLine(); s != null; s = bufferedReader
                    .readLine()) {
                builder.append(s);
            }
            jsonObject = JSONObject.fromObject(builder.toString());
        } catch (Exception e) {
            e.printStackTrace();
            jsonObject = null;
        }
        return jsonObject;
    }
}
package com.iwant.download2geodata.data;

import java.io.Serializable;
import java.util.List;

/**
 * @Description: 
 * @author: whsgzcy
 * @date: 2016-12-17 下午2:07:25  
 *    
 */
public class ShopInfo implements Serializable{
    
    private String name;
    private String tel;
    private String address;
    private String longitude;//经度
    private String latitude;//纬度
    private List<TemplateData> templateData;// 图片
    
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }
    public String getTel() {
        return tel;
    }
    public void setTel(String tel) {
        this.tel = tel;
    }
    public String getAddress() {
        return address;
    }
    public void setAddress(String address) {
        this.address = address;
    }
    public String getLongitude() {
        return longitude;
    }
    public void setLongitude(String longitude) {
        this.longitude = longitude;
    }
    public String getLatitude() {
        return latitude;
    }
    public void setLatitude(String latitude) {
        this.latitude = latitude;
    }
    public List<TemplateData> getTemplateData() {
        return templateData;
    }
    public void setTemplateData(List<TemplateData> templateData) {
        this.templateData = templateData;
    }
}
package com.iwant.download2geodata.data;

import java.io.Serializable;
import java.util.List;

/**
 * @Description: 
 * @author: whsgzcy
 * @date: 2016-12-17 下午2:40:12  
 *    
 */
public class ShopList implements Serializable{
    
    private List<ShopInfo> mShopInfoList;

    public List<ShopInfo> getmShopInfoList() {
        return mShopInfoList;
    }

    public void setmShopInfoList(List<ShopInfo> mShopInfoList) {
        this.mShopInfoList = mShopInfoList;
    }

}
package com.iwant.download2geodata.data;

import java.io.Serializable;

/**
 * @Description: 
 * @author: whsgzcy
 * @date: 2016-12-17 下午2:20:13  
 *    
 */
public class TemplateData implements Serializable{
    
    private String pic_info;

    public String getPic_info() {
        return pic_info;
    }

    public void setPic_info(String pic_info) {
        this.pic_info = pic_info;
    }
}

很简单,懂java的应该都能看懂,在此提出一个设计思路,我代码中屏蔽了一个睡眠线程,是防止我的IP被屏蔽,如果加入sleep,那么,线程就会在这段时间执行主线程上的逻辑,我是一次性通过,就懒得加了,有兴趣的同学可以将这块加上,thanks a lot。

本文出自 “7851921” 博客,请务必保留此出处http://7861921.blog.51cto.com/7851921/1883764

从高德地图大批量扒取数据,并写入excel