首页 > 代码库 > 自己写的一个读取execl的帮助类

自己写的一个读取execl的帮助类

目标:读取execl的第一个sheet,并传入不需要读取的表头的行数,返回该execl里所有数据的list

解析共有2种:1、DOM      2、SAX

  1 import java.io.File;  2 import java.io.IOException;  3 import java.io.InputStream;  4 import java.text.DecimalFormat;  5 import java.util.ArrayList;  6 import java.util.Iterator;  7 import java.util.List;  8 import java.util.regex.Matcher;  9 import java.util.regex.Pattern; 10  11 import org.apache.commons.logging.Log; 12 import org.apache.commons.logging.LogFactory; 13 import org.apache.poi.openxml4j.exceptions.InvalidFormatException; 14 import org.apache.poi.openxml4j.exceptions.OpenXML4JException; 15 import org.apache.poi.openxml4j.opc.OPCPackage; 16 import org.apache.poi.ss.usermodel.Cell; 17 import org.apache.poi.ss.usermodel.Row; 18 import org.apache.poi.ss.usermodel.Sheet; 19 import org.apache.poi.ss.usermodel.Workbook; 20 import org.apache.poi.ss.usermodel.WorkbookFactory; 21 import org.apache.poi.xssf.eventusermodel.XSSFReader; 22 import org.apache.poi.xssf.model.SharedStringsTable; 23 import org.apache.poi.xssf.usermodel.XSSFRichTextString; 24 import org.xml.sax.Attributes; 25 import org.xml.sax.ContentHandler; 26 import org.xml.sax.InputSource; 27 import org.xml.sax.SAXException; 28 import org.xml.sax.XMLReader; 29 import org.xml.sax.helpers.DefaultHandler; 30 import org.xml.sax.helpers.XMLReaderFactory; 31  32 /** 33  * 基于XSSF and SAX (Event API) 34  * 读取excel的第一个Sheet的内容 35  * @author yzl 36  * 37  */ 38 public class ReadExcelUtils { 39     private int headCount = 0; 40     private List<List<String>> list = new ArrayList<List<String>>(); 41     private static final Log log = LogFactory.getLog(ReadExcelUtils.class); 42      43     /** 44      * 通过文件流构建DOM进行解析 45      * @param ins 46      * @param headRowCount 47      * @return 48      * @throws InvalidFormatException 49      * @throws IOException 50      */ 51     public  List<List<String>> processDOMReadSheet(InputStream ins,int headRowCount) throws InvalidFormatException, IOException { 52         Workbook workbook = WorkbookFactory.create(ins); 53         return this.processDOMRead(workbook, headCount); 54     } 55      56     /** 57      * 采用DOM的形式进行解析 58      * @param filename 59      * @param headRowCount 60      * @return 61      * @throws IOException  62      * @throws InvalidFormatException  63      * @throws Exception 64      */ 65     public  List<List<String>> processDOMReadSheet(String filename,int headRowCount) throws InvalidFormatException, IOException { 66         Workbook workbook = WorkbookFactory.create(new File(filename)); 67         return this.processDOMRead(workbook, headCount); 68     } 69  70     /** 71      * 采用SAX进行解析 72      * @param filename 73      * @param headRowCount 74      * @return 75      * @throws OpenXML4JException  76      * @throws IOException  77      * @throws SAXException  78      * @throws Exception 79      */ 80     public List<List<String>> processSAXReadSheet(String filename,int headRowCount) throws IOException, OpenXML4JException, SAXException   { 81         headCount = headRowCount; 82          83         OPCPackage pkg = OPCPackage.open(filename); 84         XSSFReader r = new XSSFReader( pkg ); 85         SharedStringsTable sst = r.getSharedStringsTable(); 86         XMLReader parser = fetchSheetParser(sst); 87  88         Iterator<InputStream> sheets = r.getSheetsData(); 89         InputStream sheet = sheets.next(); 90         InputSource sheetSource = new InputSource(sheet); 91         parser.parse(sheetSource); 92         sheet.close(); 93          94         log.debug("时间:"+DateUtils.getNowTime()+",共读取了execl的记录数为 :"+list.size()); 95          96         return list; 97     } 98  99     private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {100         XMLReader parser =101             XMLReaderFactory.createXMLReader(102                     "org.apache.xerces.parsers.SAXParser"103             );104         ContentHandler handler = new SheetHandler(sst);105         parser.setContentHandler(handler);106         return parser;107     }108 109     /** 110      * SAX 解析excel111      */112     private class SheetHandler extends DefaultHandler {113         private SharedStringsTable sst;114         private String lastContents;115         private boolean nextIsString;116         private boolean isNullCell;117         //读取行的索引118         private int rowIndex = 0;119         //是否重新开始了一行120         private boolean curRow = false;121         private List<String> rowContent;122         123         private SheetHandler(SharedStringsTable sst) {124             this.sst = sst;125         }126         127         public void startElement(String uri, String localName, String name,128                 Attributes attributes) throws SAXException {129             //节点的类型130             //System.out.println("---------begin:" + name);131             if(name.equals("row")){132                 rowIndex++;133             }134             //表头的行直接跳过135             if(rowIndex > headCount){136                 curRow = true;137                 // c => cell138                 if(name.equals("c")) {139                     String cellType = attributes.getValue("t");140                     if(null == cellType){141                         isNullCell = true;142                     }else{143                         if(cellType.equals("s")) {144                             nextIsString = true;145                         } else {146                             nextIsString = false;147                         }148                         isNullCell = false;149                     }150                 }151                 // Clear contents cache152                 lastContents = "";153             }154         }155         156         public void endElement(String uri, String localName, String name)157                 throws SAXException {158             //System.out.println("-------end:"+name);159             if(rowIndex > headCount){160                 if(nextIsString) {161                     int idx = Integer.parseInt(lastContents);162                     lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();163                     nextIsString = false;164                 }165                 if(name.equals("v")) {166                     //System.out.println(lastContents);167                     if(curRow){168                         //是新行则new一行的对象来保存一行的值169                         if(null==rowContent){170                             rowContent = new ArrayList<String>();171                         }172                         rowContent.add(lastContents);173                     }174                 }else if(name.equals("c") && isNullCell){175                     if(curRow){176                         //是新行则new一行的对象来保存一行的值177                         if(null==rowContent){178                             rowContent = new ArrayList<String>();179                         }180                         rowContent.add(null);181                     }182                 }183                 184                 isNullCell = false;185 186                 if("row".equals(name)){187                     list.add(rowContent);188                     curRow = false;189                     rowContent = null;190                 }191             }192             193         }194 195         public void characters(char[] ch, int start, int length)196                 throws SAXException {197             lastContents += new String(ch, start, length);198         }199     }200     201     /**202      * DOM的形式解析execl203      * @param workbook204      * @param headRowCount205      * @return206      * @throws InvalidFormatException207      * @throws IOException208      */209     private List<List<String>> processDOMRead(Workbook workbook,int headRowCount) throws InvalidFormatException, IOException {210         headCount = headRowCount;211         212         Sheet sheet = workbook.getSheetAt(0);213         //行数214         int endRowIndex = sheet.getLastRowNum();215         216         Row row = null;217         List<String> rowList = null;218         219         for(int i=headCount; i<=endRowIndex; i++){220             rowList = new ArrayList<String>();221             row = sheet.getRow(i);222             for(int j=0; j<row.getLastCellNum();j++){223                 if(null==row.getCell(j)){224                     rowList.add(null);225                     continue;226                 }227                 int dataType = row.getCell(j).getCellType();228                 if(dataType == Cell.CELL_TYPE_NUMERIC){229                     DecimalFormat df = new DecimalFormat("0.####################");  230                     rowList.add(df.format(row.getCell(j).getNumericCellValue()));231                 }else if(dataType == Cell.CELL_TYPE_BLANK){232                     rowList.add(null);233                 }else if(dataType == Cell.CELL_TYPE_ERROR){234                     rowList.add(null);235                 }else{236                     //这里的去空格根据自己的情况判断237                     String valString = row.getCell(j).getStringCellValue();238                     Pattern p = Pattern.compile("\\s*|\t|\r|\n");239                     Matcher m = p.matcher(valString);240                     valString = m.replaceAll("");241                     //去掉狗日的不知道是啥东西的空格242                     if(valString.indexOf(" ")!=-1){243                         valString = valString.substring(0, valString.indexOf(" "));244                     }245                     246                     rowList.add(valString);247                 }248             }249             250             list.add(rowList);251         }252         log.debug("时间:"+DateUtils.getNowTime()+",共读取了execl的记录数为 :"+list.size());253         254         return list;255     }256     257     @SuppressWarnings("unused")258     public static void main(String[] args) throws Exception {259         ReadExcelUtils howto = new ReadExcelUtils();260         String fileName = "f:/test.xlsx";261         List<List<String>> list = howto.processSAXReadSheet(fileName,2);262         263         ReadExcelUtils h = new ReadExcelUtils();264         String fileName1 = "f:/test.xls";265         List<List<String>> result = h.processDOMReadSheet(fileName1,2);266     }267 }

 

自己写的一个读取execl的帮助类