首页 > 代码库 > 2003与2007excel数据导入小(5M)中(10M)大(80M以上)

2003与2007excel数据导入小(5M)中(10M)大(80M以上)

//整体excel加载  问题出现在第一步文件加载,经测试5M文件的加载基本已经到我电脑4G内存的极限了,
//当然如果电脑配置更好的话可以优化jvm,应该会再大一些

一、小excel处理

import com.yeepay.prop.common.utils.excel.HxlsReader;
import com.yeepay.prop.common.utils.excel.IRowReader;
import com.yeepay.prop.common.utils.excel.XxlsReader;
import com.yeepay.prop.common.utils.exception.SysOperCustomException;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.File;
import java.io.FileInputStream;
import java.text.SimpleDateFormat;
import java.util.*;

public class POIUtil {

    /**
     * 导入Excel文件
     *      内容以List<Map<String K,String V>>的方式存放
     * @param excelFile :   Excel文件对象
     * @param strKeys   :   Map的Key列表,Value为相应的sheet一行中各列的值
     * @return
     */
    public static List<Map<String,String>> importExcelToMap(File excelFile, String strKeys) {
        String[] strKey = strKeys.split(",");
        List<Map<String,String>> listMap = new ArrayList<Map<String,String>>();
        String fileName = excelFile.getPath();
        int i =   0;
        try {
            Workbook workbook = null;
            String fileType = fileName.substring(fileName.lastIndexOf(".")+1).toLowerCase();
            try {
                if (fileType.equals("xls")) {
                    <span style="color:#FF0000;">workbook = new XSSFWorkbook(new FileInputStream(excelFile))</span>;//<span style="font-size:18px;color:#FF0000;">如果文件过大 一定会在里卡死,造成内存溢出</span>
                } else if (fileType.equals("xlsx")) {
                   <span style="color:#FF0000;"> workbook = new HSSFWorkbook(new FileInputStream(excelFile));</span>
                }
            } catch (Exception e) {
                throw new SysOperCustomException("导入文件格式错误: "+e.getMessage());
            }
            Sheet sheet = workbook.getSheetAt(0);
            while (true) {
                Row row = sheet.getRow(i);
                if (row == null)
                    break;

                Map<String,String> map = new HashMap<String,String>();
                Boolean flag = false;
                for(int keyIndex =   0; keyIndex < strKey.length; keyIndex++){
                    //System.out.println(keyIndex+" "+row.getCell(keyIndex));
                    Cell cell = row.getCell(keyIndex);
                    //null 不可以 设置格式为cell_type_string
                    if(null != row.getCell(keyIndex))
                    {
                        if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC && HSSFDateUtil.isCellDateFormatted(cell))
                        {
                            // 数值、日期类型
                            double d = cell.getNumericCellValue();
                            // 日期类型
                            Date date = HSSFDateUtil.getJavaDate(d);
                            cell.setCellType(HSSFCell.CELL_TYPE_STRING);
                            cell.setCellValue(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
                                    .format(date));
                        }
                        else
                        {
                            cell.setCellType(HSSFCell.CELL_TYPE_STRING);
                        }
                    }
                    String cellValue = http://www.mamicode.com/"";>

调用方法如下

import java.io.File;
import java.util.Date;
import java.util.List;
import java.util.Map;

public class Test {

    @org.junit.Test
    public void testImport() {

        File image = new File("D:\\test.xls");
        String titelStr = "bankCustomerNo,cardNumber,tradeAmount,tradeTime,requestId";//实体属性


        System.out.println(new Date());
        List<Map<String,String>> list =  POIUtil.importExcelToMap(image, titelStr);
        //模板的判定
        //在这里处理业务逻辑与数据处理  实体的创建于保存
        for(Map<String, String> map : list) {
            User user = new User();
            user.setName(map.get("name"));
            .
            .
            .
        }
    }
}


在我以前所有的excel数据导入都是通过poi的简单应用,直接加载整个excel进行数据返回统计,第一步直接造成了不可能对于大文件、大数据的的处理。因此通过参考http://blog.sina.com.cn/s/blog_628d4dd101017quk.html并在以上的基础上编写了如下工具类,之所以工具类不支持大数据量的处理是因为其中方法optRows编写代码问题,该方法为每一行数据的存储都会调用该方法,当数据量达到10万条数据的时候就会调用十万次,list中存在十万个map数据,map中又存在十万个key、10万个value,并且这些对象会长期持有不会释放掉,因此会造成gc回收出现文件,最后超出内存 异常爆出out of memory

之所以保留了此方法,是因为数据一起返回操作会更简便,业务书写会更方便(相对于大excel处理)

二、中等excel处理

import com.yeepay.prop.common.utils.excel.HxlsReader;
import com.yeepay.prop.common.utils.excel.IRowReader;
import com.yeepay.prop.common.utils.excel.XxlsReader;
import com.yeepay.prop.common.utils.exception.SysOperCustomException;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.File;
import java.io.FileInputStream;
import java.text.SimpleDateFormat;
import java.util.*;


public class POIUtil {

    /**
     * 导入Excel文件
     *      内容以List<Map<String K,String V>>的方式存放
     * @param excelFile :   Excel文件对象
     * @param strKeys   :   Map的Key列表,Value为相应的sheet一行中各列的值
     * @return
     */
    public static List<Map<String,String>> importExcelToMap(File excelFile, String strKeys) {
        String[] strKey = strKeys.split(",");
        List<Map<String,String>> listMap = new ArrayList<Map<String,String>>();
        String fileName = excelFile.getPath();
        int i =   0;
        try {
            Workbook workbook = null;
            String fileType = fileName.substring(fileName.lastIndexOf(".")+1).toLowerCase();
            try {
                if (fileType.equals("xls")) {
                    workbook = new XSSFWorkbook(new FileInputStream(excelFile));
                } else if (fileType.equals("xlsx")) {
                    workbook = new HSSFWorkbook(new FileInputStream(excelFile));
                }
            } catch (Exception e) {
                throw new SysOperCustomException("导入文件格式错误: "+e.getMessage());
            }
            Sheet sheet = workbook.getSheetAt(0);
            while (true) {
                Row row = sheet.getRow(i);
                if (row == null)
                    break;

                Map<String,String> map = new HashMap<String,String>();
                Boolean flag = false;
                for(int keyIndex =   0; keyIndex < strKey.length; keyIndex++){
                    //System.out.println(keyIndex+" "+row.getCell(keyIndex));
                    Cell cell = row.getCell(keyIndex);
                    //null 不可以 设置格式为cell_type_string
                    if(null != row.getCell(keyIndex))
                    {
                        if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC && HSSFDateUtil.isCellDateFormatted(cell))
                        {
                            // 数值、日期类型
                            double d = cell.getNumericCellValue();
                            // 日期类型
                            Date date = HSSFDateUtil.getJavaDate(d);
                            cell.setCellType(HSSFCell.CELL_TYPE_STRING);
                            cell.setCellValue(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
                                    .format(date));
                        }
                        else
                        {
                            cell.setCellType(HSSFCell.CELL_TYPE_STRING);
                        }
                    }
                    String cellValue = http://www.mamicode.com/"";>

1.excel2003工具类

package com.yeepay.prop.common.utils;

import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * excel 2003 文件处理
 */
public class HxlsUtil implements HSSFListener {
    private int minColumns;
    private POIFSFileSystem fs;
    private PrintStream output;

    private int lastRowNumber;
    private int lastColumnNumber;

    /** Should we output the formula, or the value it has? */
    private boolean outputFormulaValues = true;

    /** For parsing Formulas */
    private SheetRecordCollectingListener workbookBuildingListener;
    private HSSFWorkbook stubWorkbook;

    // Records we pick up as we process
    private SSTRecord sstRecord;
    private FormatTrackingHSSFListener formatListener;

    /** So we known which sheet we're on */
    private int sheetIndex = -1;
    private BoundSheetRecord[] orderedBSRs;

    private ArrayList boundSheetRecords = new ArrayList();

    // For handling formulas with string results
    private int nextRow;
    private int nextColumn;
    private boolean outputNextStringRecord;

    private int curRow;
    private List<String> rowlist;
    @SuppressWarnings( "unused")
    private String sheetName;

    private String title;

    private List<Map<String, String>> mapList;

    private HxlsUtil(POIFSFileSystem fs, String title)
            throws SQLException {
        this.fs = fs;
        this.output = System.out;
        this.minColumns = -1;
        this.curRow = 0;
        this.rowlist = new ArrayList<String>();
        this.title = title;
        this.mapList = new ArrayList<Map<String, String>>();
    }

    private HxlsUtil(String filename, String title) throws IOException, SQLException {
        this(new POIFSFileSystem(new FileInputStream(filename)), title);
    }

    <strong>public Map<String, String> optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {

        <span style="color:#FF0000;">String[] titleStr = title.split(",");</span>
        <span style="color:#FF0000;">int col</span> = rowlist.size();
        <span style="color:#FF0000;">Map<String, String> map = new HashMap<String, String>();</span>
        for(int i = 0; i < titleStr.length; i++) {
            if(col >= titleStr.length)
                map.put(titleStr[i], rowlist.get(i).trim());
            else
                map.put(titleStr[i], "");
        }
        return map.size() > 0 ? map : null;
    }</strong>

    /**
     * 遍历 excel 文件
     */
    public void process() throws IOException {
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
                this);
        formatListener = new FormatTrackingHSSFListener(listener);

        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();

        if (outputFormulaValues) {
            request.addListenerForAllRecords(formatListener);
        } else {
            workbookBuildingListener = new SheetRecordCollectingListener(
                    formatListener);
            request.addListenerForAllRecords(workbookBuildingListener);
        }

        factory.processWorkbookEvents(request, fs);
    }

    /**
     * HSSFListener 监听方法,处理 Record
     */
    @SuppressWarnings("unchecked")
    public void processRecord(Record record) {
        int thisRow = -1;
        int thisColumn = -1;
        String thisStr = null;
        String value = http://www.mamicode.com/null;>

2.2007excel工具类

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.File;
import java.io.InputStream;
import java.sql.SQLException;
import java.util.*;

/**
 * Excel 2007采用SAX解析文件,注意JDK版本1.5以上
 */
public class XxlsUtil extends DefaultHandler {
	private SharedStringsTable sst;
	private String lastContents;
	private boolean nextIsString;

	private int sheetIndex = -1;
	private List<String> rowlist = new ArrayList<String>();
	private int curRow = 0;
	private int curCol = 0;

    private String title;

    private List<Map<String, String>> mapList;

    private XxlsUtil(String fileName, String title) throws Exception {
        this.title = title;
        this.mapList = new ArrayList<Map<String, String>>();
        processOneSheet(fileName, 1);
    }


    public Map<String, String> optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {

        String[] titleStr = title.split(",");
        int col = rowlist.size();
        Map<String, String> map = new HashMap<String, String>();
        for(int i = 0; i < titleStr.length; i++) {
            if(col >= titleStr.length)
                map.put(titleStr[i], rowlist.get(i).trim());
            else
                map.put(titleStr[i], "");
//            System.out.print(rowlist.get(i) + "   ");
        }
        return map.size() > 0 ? map : null;
    }

	//只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3
	public void processOneSheet(String filename,int sheetId) throws Exception {
		OPCPackage pkg = OPCPackage.open(filename);
		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();
		
		XMLReader parser = fetchSheetParser(sst);

		// rId2 found by processing the Workbook
		// 根据 rId# 或 rSheet# 查找sheet
		InputStream sheet2 = r.getSheet("rId"+sheetId);
		sheetIndex++;
		InputSource sheetSource = new InputSource(sheet2);
		parser.parse(sheetSource);
		sheet2.close();
	}

	/**
	 * 遍历 excel 文件
	 */
	public void process(String filename) throws Exception {
		OPCPackage pkg = OPCPackage.open(filename);
		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);

		Iterator<InputStream> sheets = r.getSheetsData();
		while (sheets.hasNext()) {
			curRow = 0;
			sheetIndex++;
			InputStream sheet = sheets.next();
			InputSource sheetSource = new InputSource(sheet);
			parser.parse(sheetSource);
			sheet.close();
		}
	}

	public XMLReader fetchSheetParser(SharedStringsTable sst)
			throws SAXException {
		XMLReader parser = XMLReaderFactory.createXMLReader();
//				.createXMLReader("org.apache.xerces.parsers.SAXParser");
		this.sst = sst;
		parser.setContentHandler(this);
		return parser;
	}

	public void startElement(String uri, String localName, String name,
			Attributes attributes) throws SAXException {
		// c => 单元格
		if (name.equals("c")) {
			// 如果下一个元素是 SST 的索引,则将nextIsString标记为true
			String cellType = attributes.getValue("t");
			if (cellType != null && cellType.equals("s")) {
				nextIsString = true;
			} else {
				nextIsString = false;
			}
		}
		// 置空
		lastContents = "";
	}

	public void endElement(String uri, String localName, String name)
			throws SAXException {
		// 根据SST的索引值的到单元格的真正要存储的字符串
		// 这时characters()方法可能会被调用多次
		if (nextIsString) {
			try {
				int idx = Integer.parseInt(lastContents);
				lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
						.toString();
			} catch (Exception e) {

			}
		}

		// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
		// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
		if (name.equals("v")) {
			String value = http://www.mamicode.com/lastContents.trim();>

调用方法如下


import com.yeepay.prop.common.utils.POIUtil;

import java.io.File;
import java.util.Date;
import java.util.List;
import java.util.Map;

/**
 * Description
 * PackageName: PACKAGE_NAME
 *
 * @author hua.jin
 * @since 14-12-22 上午9:26
 */
public class Test {

    @org.junit.Test
    public void testImport() {

        File image = new File("D:\\test.xls");
        String titelStr = "name,age,gender";


        System.out.println(new Date());
        List<Map<String,String>> list =  POIUtil.<strong>importBigExcelToMap</strong>(image, titelStr);       <pre name="code" class="java">        //模板的判定
        //在这里处理业务逻辑与数据处理  实体的创建于保存
        for(Map<String, String> map : list) {
            User user = new User();
            user.setName(map.get("name"));
            .
            .
            .
        }
}}



三、大excel文件的处理

这个就没什么好说的了,该方法 主要参考了http://blog.csdn.net/goodkuang2012/article/details/7350985,将行数据进行了接口处理,因为一行行数据进行处理也就缺失了整体文件的把控,在异常处理这块 该好好考虑一下了,在此基础上的改动没有太大,也就不再叙述了,有得有失吧

经测试大文件处理80M没有问题



















2003与2007excel数据导入小(5M)中(10M)大(80M以上)