您现在的位置是:首页 > 文章详情

POI事件模式指北(三)- 读取Excel实战

日期:2019-02-23点击:347

POI事件模式指北(三)- 读取Excel实战

1、简介

这篇文章将介绍一个完整的利用POI事件模式读取Excel的实例,目的是贴近实际使用中的情况,并且考虑了细节的问题而不仅仅满足于实现简单功能。

1.1、业务背景

系统中使用了POI的用户模式读取Excel文件,某一次用户上传了一个超过1M的文件,导致系统内存很快耗光,最后出现了OOM异常,最后分析决定使用事件模式进行改造以提高性能。

1.2、版本和依赖

下面列出来的是实例代码实际使用的情况,您可以选择其他版本,绝大多数情况下可以正常运行。

  • JDK6
  • poi-3.9.jar
  • poi-ooxml-3.9.jar
  • dom4j-1.6.1.jar
  • xmlbeans-2.3.0.jar

以上是实现读取Excel所需的最小jar包;如果你还需要或者想要学习POI的其他部分,请自行导入其他包;强烈建议导入POI全部jar包。

2、示例代码

下面的导入针对比较典型的导入场景,主要有以下假定条件:

  • 导入的文件列数是确定的
  • 需要导入的内容在第一个sheet中
  • 文件中所有数据都是有意义的且不包含无效数据

上面的假设代表一种典型场景,如果实际场景有所不同,只需要对下面代码进行简单改造即可。

import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener; import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFRequest; import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord; import org.apache.poi.hssf.model.HSSFFormulaParser; import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.BlankRecord; import org.apache.poi.hssf.record.BoolErrRecord; import org.apache.poi.hssf.record.BoundSheetRecord; import org.apache.poi.hssf.record.FormulaRecord; import org.apache.poi.hssf.record.LabelRecord; import org.apache.poi.hssf.record.LabelSSTRecord; import org.apache.poi.hssf.record.NumberRecord; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.SSTRecord; import org.apache.poi.hssf.record.StringRecord; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; public class ExcelEventParser { enum xssfDataType { BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, } class XLSReader implements HSSFListener { //POIFS文件流 private POIFSFileSystem fs; //当前行数 private int lastRowNumber; //当前列数 private int lastColumnNumber; //是否计算公式的值 private boolean outputFormulaValues = true; private SheetRecordCollectingListener workbookBuildingListener; private HSSFWorkbook stubWorkbook; //共享字符集 private SSTRecord sstRecord; private FormatTrackingHSSFListener formatListener; private int sheetIndex = -1; private List<Record> boundSheetRecords = new ArrayList<Record>(); //公式单元格结果所在行 private int nextRow; //公式单元格结果所在列 private int nextColumn; //公式单元格是否存在计算值 private boolean outputNextStringRecord; //当前行数 private int curRow = 0; //当前最大列数 private int maxRef = 0; //空单元格使用的填充字符 private String NULL = "-"; private List<String> row = new ArrayList<String>(); private List<List<String>> rows = new ArrayList<List<String>>(); /** * 读取Excel2003文件 * @param filePath 文件路径 * @return * @throws IOException */ public List<List<String>> process(String filePath) throws IOException { //使用POIFS读取文件流 this.fs = new POIFSFileSystem(new FileInputStream(filePath)); MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this); formatListener = new FormatTrackingHSSFListener(listener); HSSFEventFactory factory = new HSSFEventFactory(); HSSFRequest request = new HSSFRequest(); if (outputFormulaValues) { //添加全部监听器 request.addListenerForAllRecords(formatListener); } else { workbookBuildingListener = new SheetRecordCollectingListener(formatListener); request.addListenerForAllRecords(workbookBuildingListener); } //处理文件流 factory.processWorkbookEvents(request, fs); return rows; } /** * 依次处理每个监听的record * @param record * @return * @throws IOException */ public void processRecord(Record record) { //只处理第一个sheet,其他全部忽略 if (sheetIndex > 0) { return; } if (maxRef > MAX_COLUMN) { return; } int thisRow = -1; int thisColumn = -1; String thisStr = null; String value = null; //根据record的sid匹配类型分别进行处理 //可以在这里添加业务逻辑 switch (record.getSid()) { // 记录了sheetName case BoundSheetRecord.sid: boundSheetRecords.add(record); break; // Workbook、Sheet的开始 case BOFRecord.sid: BOFRecord br = (BOFRecord) record; if (br.getType() == BOFRecord.TYPE_WORKSHEET) { if (workbookBuildingListener != null && stubWorkbook == null) { stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook(); } sheetIndex++; } break; //共享字符集 case SSTRecord.sid: sstRecord = (SSTRecord) record; break; // 存在单元格样式的空单元格 case BlankRecord.sid: BlankRecord brec = (BlankRecord) record; thisRow = brec.getRow(); thisColumn = brec.getColumn(); thisStr = NULL; row.add(thisColumn, thisStr); break; // 布尔或错误单元格 case BoolErrRecord.sid: BoolErrRecord berec = (BoolErrRecord) record; thisRow = berec.getRow(); thisColumn = berec.getColumn(); thisStr = berec.getBooleanValue() + ""; row.add(thisColumn, thisStr); break; // 公式单元格 case FormulaRecord.sid: //调用方法计算单元格的值 FormulaRecord frec = (FormulaRecord) record; thisRow = frec.getRow(); thisColumn = frec.getColumn(); if (outputFormulaValues) { if (Double.isNaN(frec.getValue())) { outputNextStringRecord = true; nextRow = frec.getRow(); nextColumn = frec.getColumn(); } else { thisStr = formatListener.formatNumberDateCell(frec); } } else { thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"'; } row.add(thisColumn, thisStr); break; // 公式的计算结果单元格 case StringRecord.sid: if (outputNextStringRecord) { StringRecord srec = (StringRecord) record; thisStr = srec.getString(); thisRow = nextRow; thisColumn = nextColumn; outputNextStringRecord = false; } break; // 文本单元格 case LabelRecord.sid: LabelRecord lrec = (LabelRecord) record; curRow = thisRow = lrec.getRow(); thisColumn = lrec.getColumn(); value = lrec.getValue().trim(); value = value.equals("") ? NULL : value; this.row.add(thisColumn, value); break; // 共用的文本单元格 case LabelSSTRecord.sid: LabelSSTRecord lsrec = (LabelSSTRecord) record; curRow = thisRow = lsrec.getRow(); thisColumn = lsrec.getColumn(); if (sstRecord == null) { row.add(thisColumn, NULL); } else { value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim(); value = value.equals("") ? NULL : value; row.add(thisColumn, value); } break; // 数值单元格:数字单元格和日期单元格 case NumberRecord.sid: NumberRecord numrec = (NumberRecord) record; curRow = thisRow = numrec.getRow(); thisColumn = numrec.getColumn(); //处理时间类型的数字 value = formatListener.formatNumberDateCell(numrec).trim(); value = value.equals("") ? NULL : value; row.add(thisColumn, value); break; default: break; } if (thisRow != -1 && thisRow != lastRowNumber) { lastColumnNumber = -1; } if (record instanceof MissingCellDummyRecord) { MissingCellDummyRecord mc = (MissingCellDummyRecord) record; curRow = thisRow = mc.getRow(); thisColumn = mc.getColumn(); row.add(thisColumn, NULL); } if (thisRow > -1) lastRowNumber = thisRow; if (thisColumn > -1) lastColumnNumber = thisColumn; //进入行结束处理 if (record instanceof LastCellOfRowDummyRecord) { if (maxRef > row.size()) { int len = maxRef - row.size(); for (int i = 0; i < len; i++) { lastColumnNumber++; row.add(lastColumnNumber, NULL); } } if (curRow == 0) { maxRef = row.size(); } rows.add(row); lastColumnNumber = -1; row = new ArrayList<String>(); } } } class XLSXReader extends DefaultHandler { //样式数据 private StylesTable stylesTable; //共享字符集 private ReadOnlySharedStringsTable sharedStringsTable; //是否存在内联字符 private boolean vIsOpen; //记录数据类型 private xssfDataType nextDataType; private short formatIndex; private String formatString; //格式化类 private final DataFormatter formatter; //当前所在列 private int thisColumn = -1; //当前单元格值 private StringBuffer value; //储存一列的数据 private List<String> record = new ArrayList<String>(); //储存全部数据 private List<List<String>> rows = new ArrayList<List<String>>(); //判断当前行有没有空单元格 private boolean isCellNull = false; //前一个单元格和当前单元格 private String preRef = null, ref = null; //避免同一个单元格重复计数 private String Newcell = null; //计数当前行中的空格 private int rowNull = 0; //当前单元格对应的序号 private int curCol = 0; //当前行对应的序号 private int curRow = 0; //空单元格使用的填充字符 private String NULL = "-"; /** * XLSXReader构造函数 * @param styles 单元格样式数据 * @param strings 共享字符集 */ public XLSXReader(StylesTable styles, ReadOnlySharedStringsTable strings) { this.stylesTable = styles; this.sharedStringsTable = strings; this.value = new StringBuffer(); this.nextDataType = xssfDataType.NUMBER; this.formatter = new DataFormatter(); record = new ArrayList<String>(); rows.clear(); } /** * 每当遇到开始标签时进入 * @param uri 命名空间路径或空 * @param localName 本地名称或空 * @param name 标签名称 * @param attributes 标签内容 * @return * @throws SAXException */ public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if ("inlineStr".equals(name) || "v".equals(name)) { vIsOpen = true; value.setLength(0); //每当遇到<c>表示新的单元格 } else if ("c".equals(name)) { //记录上一个单元格和当前单元格用于填充空格 if (preRef == null) { preRef = attributes.getValue("r"); } else { preRef = ref; } ref = attributes.getValue("r"); String r = ref; int firstDigit = -1; for (int c = 0; c < r.length(); ++c) { if (Character.isDigit(r.charAt(c))) { firstDigit = c; break; } } //获取当前列数 thisColumn = nameToColumn(r.substring(0, firstDigit)); //判断是否超过最大列数 if (thisColumn > MAX_COLUMN) { return; } //将单元格类型翻译成设定名称 this.nextDataType = xssfDataType.NUMBER; this.formatIndex = -1; this.formatString = null; String cellTpye = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); if ("b".equals(cellTpye)) nextDataType = xssfDataType.BOOL; else if ("e".equals(cellTpye)) nextDataType = xssfDataType.ERROR; else if ("inlineStr".equals(cellTpye)) nextDataType = xssfDataType.INLINESTR; else if ("s".equals(cellTpye)) nextDataType = xssfDataType.SSTINDEX; else if ("str".equals(cellTpye)) nextDataType = xssfDataType.FORMULA; else if (cellStyleStr != null) { int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); this.formatIndex = style.getDataFormat(); this.formatString = style.getDataFormatString(); if (this.formatString == null) { this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex); } } } } /** * 每当遇到结束标签时进入 * @param uri 命名空间路径或空 * @param localName 本地名称或空 * @param name 标签名称 * @return * @throws SAXException */ public void endElement(String uri, String localName, String name) throws SAXException { //当遇到</v>标签时,表面存在一个需要处理的值 if ("v".equals(name)) { if (thisColumn > MAX_COLUMN) { return; } String thisStr = NULL; //根据当前单元格和上一个单元格判断需要补充的空单元格 if (null != ref && !ref.equals(Newcell)) { if (!ref.equals(preRef)) { int num = Integer.parseInt(ref.replaceAll("[A-Z]", "")); int num_1 = Integer.parseInt(preRef.replaceAll("[A-Z]", "")); int len = 0; //取出两个单元格所在行数,如果行数不同则进行换行,从A1开始填充 if (num > num_1) { len = countNullCell(ref, "A1")+1; }else{ len = countNullCell(ref, preRef); } //填充空单元格 for (int i = 0; i < len; i++) { record.add(curCol, thisStr); curCol++; Newcell = ref; rowNull++; } } } //选择合适的方式处理单元格 switch (nextDataType) { case BOOL: char first = value.charAt(0); thisStr = first == '0' ? "否" : "是"; break; case ERROR: thisStr = value.toString(); break; case FORMULA: thisStr = value.toString(); break; case INLINESTR: XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); thisStr = rtsi.toString(); break; case SSTINDEX: String sstIndex = value.toString(); try { //查询字符索引对应的字符值 int idx = Integer.parseInt(sstIndex); XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx)); thisStr = rtss.toString(); } catch (NumberFormatException ex) { System.out.println("未找到对应的SST索引 '" + sstIndex + "': " + ex.toString()); } break; case NUMBER: String n = value.toString(); //判断该数字是不是时间 if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) { Double d = Double.parseDouble(n); Date date = HSSFDateUtil.getJavaDate(d); thisStr = formatDateToString(date); } else if (this.formatString != null) thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); else thisStr = n; break; //其他特殊类型可以自己扩展 default: thisStr = "(尚未支持的类型: " + nextDataType + ")"; break; } if (thisStr == null || thisStr.equals(NULL)) { isCellNull = true; } //这里可以根据实际业务进行改造,以单元格为单位处理 record.add(curCol, thisStr); curCol++; } else if ("row".equals(name)) { if (isCellNull == false && record.size() != rowNull) { if (MAX_COLUMN >= record.size()) { int len = MAX_COLUMN - record.size(); for (int i = 0; i < len; i++) { record.add(curCol, NULL); curCol++; } } //这里可以根据实际业务进行改造,以行为单位处理 rows.add(curRow, record); isCellNull = false; //这里必须新建对象,不能为了节约清空之前的对象 record = new ArrayList<String>(); curRow++; curCol = 0; rowNull = 0; } } } /** * 计算两个单元格中间的空单元格数量 * <p>Excel2007文件最大行数为1048576,最大列数为16284,列名为XFD</P> * @param ref 当前单元格名称 * @param preRef 之前一个单元格名称 * @return */ private int countNullCell(String ref, String preRef) { String xfd = ref.replaceAll("\\d+", ""); String xfd_1 = preRef.replaceAll("\\d+", ""); xfd = fillChar(xfd, 3, '@', true); xfd_1 = fillChar(xfd_1, 3, '@', true); char[] letter = xfd.toCharArray(); char[] letter_1 = xfd_1.toCharArray(); int res = (letter[0] - letter_1[0]) * 26 * 26 + (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]); return res - 1; } /** * 用特定字符填充字符串到指定长度 * @param str 原字符串 * @param len 目标串的长度 * @param let 填充使用的字符 * @param isPre 是否在前面填充字符 * @return */ private String fillChar(String str, int len, char let, boolean isPre) { int len_1 = str.length(); if (len_1 < len) { if (isPre) { for (int i = 0; i < (len - len_1); i++) { str = let + str; } } else { for (int i = 0; i < (len - len_1); i++) { str = str + let; } } } return str; } /** * 每遇到一个标签调用,用于记录标签类型 * @param ch 文件流字符数组 * @param start 开始位置 * @param length 读取长度 * @return * @throws SAXException */ @Override public void characters(char[] ch, int start, int length) throws SAXException { if (vIsOpen) value.append(ch, start, length); } /** * 根据单元格名称计算列数 * @param name 单元格名称(如:A1) * @return */ private int nameToColumn(String name) { int column = -1; for (int i = 0; i < name.length(); ++i) { int c = name.charAt(i); column = (column + 1) * 26 + c - 'A'; } return column; } /** * 将日期类型转变为String格式 * @param date 日期 * @return */ private String formatDateToString(Date date) { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); return sdf.format(date); } } //事先定义的最大列数 private int MAX_COLUMN = 7; private static String XLS = ".xls"; private static String XLSX = ".xlsx"; public ExcelEventParser() { } /** * 构造XLSXReader读取Excel内容 * @param styles 样式信息 * @param strings 共享字符集 * @param sheetInputStream sheet输入流 * @return * @throws IOException * @throws ParserConfigurationException * @throws SAXException */ private List<List<String>> XLSXProcessSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException { //将输入流包装成XML源 InputSource sheetSource = new InputSource(sheetInputStream); //构造基于标签回调的XMLReader SAXParserFactory saxFactory = SAXParserFactory.newInstance(); SAXParser saxParser = saxFactory.newSAXParser(); XMLReader sheetParser = saxParser.getXMLReader(); //构造XLSXReader XLSXReader handler = new XLSXReader(styles, strings); //添加管理者 sheetParser.setContentHandler(handler); //读入XML源 sheetParser.parse(sheetSource); return handler.rows; } /** * 读取Excel2007文件 * @param filePath 文件路径 * @return * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ private List<List<String>> XLSXprocess(String filePath) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { //将文件以压缩包的形式读入 OPCPackage opcPackage = OPCPackage.open(filePath, PackageAccess.READ); ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opcPackage); XSSFReader xssfReader = new XSSFReader(opcPackage); List<List<String>> sheet = null; //读入样式信息 StylesTable styles = xssfReader.getStylesTable(); //构造sheet迭代器 XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); //这里只读入了第一个sheet,如果需要读取多个可以遍历这个迭代器 if (iter.hasNext()) { InputStream stream = iter.next(); //构造XLSXReader读取内容 sheet = XLSXProcessSheet(styles, strings, stream); //关闭流 stream.close(); } opcPackage.close(); return sheet; } /** * 读取Excel2003文件 * @param filePath 文件路径 * @return * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ private List<List<String>> XLSprocess(String filePath) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { XLSReader xls = new XLSReader(); List<List<String>> sheet = xls.process(filePath); return sheet; } /** * 读取Excel文件内容并返回包含文件内容的List * @param filePath 文件路径 * @return * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public static List<List<String>> readerExcel(String filePath) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { // 将文件数据储存起来 List<List<String>> sheet = null; // 获取文件后缀名,判断文件类型 int index = filePath.lastIndexOf("."); String excelVer = filePath.substring(index, filePath.length()); ExcelEventParser csvReader = new ExcelEventParser(); if (XLS.equals(excelVer)) { sheet = csvReader.XLSprocess(filePath); } else if (XLSX.equals(excelVer)) { sheet = csvReader.XLSXprocess(filePath); } return sheet; } public static void main(String[] args) { try { // 要导入的文件地址(例:D:/新建 Microsoft Excel 工作表 .xlsx) String filePath = "D:/新建 Microsoft Excel 工作表 .xls"; List<List<String>> sheet = readerExcel(filePath); for (List<String> row : sheet) { for (String cell : row) { System.out.print(cell + ","); } System.out.println(); } } catch (Exception e) { e.printStackTrace(); } } } 

代码中按照惯例,将文件内容全部读出储存起来了,其实并不必要;使用时可以结合实际情况修改。

2.1、读取行数

读取Excel时常常会需要在读取之前先判断行数,虽然用户模式直接提供了获取行数的方法,但显然这里是不能使用的;下面读取行数的方法就是简单简化而来,主要考虑资源占用和速度。

import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFRequest; import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.RowRecord; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; public class MaxRowHandler { public static final String Excel_03 = ".xls"; public static final String Excel_07 = ".xlsx"; class XLSXMaxRowHandler extends DefaultHandler { //最大行数 private int maxRow = 0; private int cellNum = 0; public void endElement(String uri, String localName, String name) throws SAXException { if ("row".equals(name)) { if (cellNum > 0) { maxRow++; } cellNum = 0; } } } class XLSMaxRowHandler implements HSSFListener { private POIFSFileSystem fs; //最大行数 private int maxRow = 0; public int process(String filePath) throws IOException { this.fs = new POIFSFileSystem(new FileInputStream(filePath)); MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this); HSSFEventFactory factory = new HSSFEventFactory(); HSSFRequest request = new HSSFRequest(); //只监听RowRecord一种 request.addListener(listener, RowRecord.sid); factory.processWorkbookEvents(request, fs); return maxRow; } /** * 处理Excel2003文件并计算行数 * @param fileName 文件路径 * @return * @throws IOException */ public void processRecord(Record record) { if (record.getSid() == RowRecord.sid) { maxRow++; } } } public MaxRowHandler() { } /** * 读取Excel2003文件最大行数 * @param filePath 文件路径 * @return * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public int maxXLSprocess(String filePath) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { XLSMaxRowHandler xls = new XLSMaxRowHandler(); int num = xls.process(filePath); return num; } /** * 读取Excel2007文件最大行数 * @param filePath 文件路径 * @return * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ private int maxXLSXprocess(String filePath) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { //将文件以压缩包的形式读入 OPCPackage opcPackage = OPCPackage.open(filePath, PackageAccess.READ); XSSFReader xssfReader = new XSSFReader(opcPackage); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); InputStream stream = iter.next(); InputSource sheetSource = new InputSource(stream); SAXParserFactory saxFactory = SAXParserFactory.newInstance(); SAXParser saxParser = saxFactory.newSAXParser(); XMLReader sheetParser = saxParser.getXMLReader(); XLSXMaxRowHandler handler = new XLSXMaxRowHandler(); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); int num = handler.maxRow; stream.close(); opcPackage.close(); return num; } /** * 获取最大行数不再区分sheet和空行等 * @param filePath 文件路径 * @return */ public static int getMaxRow(String filePath) { int index = filePath.lastIndexOf("."); String excelVer = filePath.substring(index, filePath.length()); int num = 0; try { MaxRowHandler csvReader = new MaxRowHandler(); if (".xls".equals(excelVer)) { num = csvReader.maxXLSprocess(filePath); } else if (".xlsx".equals(excelVer)) { num = csvReader.maxXLSXprocess(filePath); } return num; }catch(Exception e) { e.printStackTrace(); } return 0; } public static void main(String[] args) { try { // 要导入的文件地址(例:D:/新建 Microsoft Excel 工作表 .xlsx) String filePath = "D:/新建 Microsoft Excel 工作表 .xlsx"; int max = getMaxRow(filePath); System.out.println(max); } catch (Exception e) { e.printStackTrace(); } } }

3、后记

至此POI事件模式读取Excel文件就介绍完了,但提高读取性能还远不止这些;POI的SXSSF(流用户模式),阿里巴巴开源的easyexcel工具也是各有特点,非常值得一试。

本文代码较多,受限于作者水平,难免有缺漏之处。如果有疑问可以在评论区交流,如果对大家有所帮助就再好不过了。

原文链接:https://yq.aliyun.com/articles/691249
关注公众号

低调大师中文资讯倾力打造互联网数据资讯、行业资源、电子商务、移动互联网、网络营销平台。

持续更新报道IT业界、互联网、市场资讯、驱动更新,是最及时权威的产业资讯及硬件资讯报道平台。

转载内容版权归作者及来源网站所有,本站原创内容转载请注明来源。

文章评论

共有0条评论来说两句吧...

文章二维码

扫描即可查看该文章

点击排行

推荐阅读

最新文章