import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map; import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook; public class ReadExcelUtil { /**
*
* @param inputStream 文件
* @param fileName 文件名(full name)
* @param sheetNum 表單序號(不寫表示全部讀取,不建議這樣做)
* @param flag 建議選擇false(true表示按單元格格式進行讀取數據,false表示所有按String格式讀取)
* @param r 開始讀取的行號
* @param c 開始讀取的列號
* @return Map<String, List<List<Object>>>
* String: sheet 的序号
* List<Object>: 每一行中每一个格子的值组成的数组,为有序 ArrayList
* List<List<Object>>: 以每一行所有格子的值组成的List为元素,组成新的List,为有序 ArrayList
*/
public Map<String, List<List<Object>>> readExcel(InputStream inputStream, String fileName, int sheetNum, boolean flag, int r, int c) {
Workbook wb = getExcelKind(inputStream, fileName);
Map<String, List<List<Object>>> map = readSheet(wb, sheetNum, flag, r, c);
return map;
}
public Map<String, List<List<Object>>> readExcel(InputStream inputStream, String fileName, boolean flag, int r, int c) {
Workbook wb = getExcelKind(inputStream, fileName);
Map<String, List<List<Object>>> map = readSheet(wb, flag, r, c);
return map;
} /**
* 选择sheet(工作表格区间)读取表格,可遍历
*
* @param wb
* @param sheetNum
* @param flag
* @param r
* @param c
* @return
*/
public Map<String, List<List<Object>>> readSheet(Workbook wb, int sheetNum, boolean flag, int r, int c) {
Map<String, List<List<Object>>> map = new HashMap<String, List<List<Object>>>();
int sn = 0;
for (Sheet sheet : wb) {
if (sn >= sheetNum) {
break;
}
List<List<Object>> list = readRow(sheet, flag, r, c);
map.put(wb.getSheetName(sn), list);
sn++;
}
return map;
} public Map<String, List<List<Object>>> readSheet(Workbook wb, boolean flag, int r, int c) {
Map<String, List<List<Object>>> map = new HashMap<String, List<List<Object>>>();
int sn = 0;
for (Sheet sheet : wb) {
List<List<Object>> list = readRow(sheet, flag, r, c);
// 以表单的名字为key存储
map.put(wb.getSheetName(sn), list);
sn++;
}
return map;
} /**
* 按 行、列 遍历式 读取表格
*
* @param sheet
* @param flag
* @param r
* @param c
* @return List<List<Object>>
*/
public List<List<Object>> readRow(Sheet sheet, boolean flag, int r, int c) {
List<List<Object>> list = new ArrayList<List<Object>>();
for (Row row : sheet) {
List<Object> olist = new ArrayList<Object>();
for (Cell cell : row) {
int rowNum = row.getRowNum();
int columnIndex = cell.getColumnIndex();
if (rowNum >= r && columnIndex >= c) {
Object obj = readCell(cell, flag);
olist.add(obj);
}
}
list.add(olist);
}
return list;
} /**
* 按 格子 读取表格
* 这个方法是根据官方的文档改编的,大致跟官方一样,可是总感觉哪里不舒服
* @param cell
* @param flag
* @return
*/
public Object readCell(Cell cell, boolean flag) {
Object obj = null;
if (flag) {
switch (cell.getCellType()) {
case Cell.CELL_TYPE_NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
obj = cell.getDateCellValue();
} else obj = cell.getNumericCellValue();
break;
case Cell.CELL_TYPE_STRING:
obj = cell.getRichStringCellValue().getString(); break;
case Cell.CELL_TYPE_FORMULA:
obj = cell.getCellFormula(); break;
case Cell.CELL_TYPE_BLANK:
// TODO 感觉这里处理的不是很好,应该有一个更好的处理行为才对
break;
case Cell.CELL_TYPE_BOOLEAN:
obj = cell.getBooleanCellValue(); break;
case Cell.CELL_TYPE_ERROR:
obj = "cell ERROR"; break;
default: break;
}
} else obj = cell.toString();
return obj;
} /**
* 根据文件后缀名,创建不同的 workbook 的类型
*
* @param file
* @param suffix
* @return workbook wb
*/
public Workbook getExcelKind(InputStream file, String fileName) {
String suffix = fileName.substring(fileName.lastIndexOf(".") + 1);
// Workbook wb = WorkbookFactory.create(file); 这是为了学习新的方法,才将这句注释掉的。否则,只需要这一句,下面的基本全都可以省略
Workbook wb = null;
if (suffix != null && suffix.equals("xlsx")) { // 07+
try {
OPCPackage pkg = OPCPackage.open(file);
wb = new XSSFWorkbook(pkg);
pkg.close();
} catch (InvalidFormatException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
} else if (suffix != null && suffix.equals("xls")) { // 03-
try {
NPOIFSFileSystem fs = new NPOIFSFileSystem(file);
wb = new HSSFWorkbook(fs.getRoot(), true);
fs.close();
} catch (IOException e) {
e.printStackTrace();
}
} else if (suffix == null) {
return null;
} else {
System.out.println("没找到对应的后缀名,检查后再试试呗。");
return null;
}
return wb;
} }
只是一个小小的例子,本想着能把这个方法做成普适的,可惜总有几个牛角尖钻不透,智商着急啊。
=====================================第二版=====================================
import com.google.common.collect.Lists;
import com.vastio.exception.PlatformException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import java.io.FileInputStream;
import java.io.IOException;
import java.util.List; public class ReadExcelFile { private static final Logger LOGGER = LoggerFactory.getLogger(ReadExcelFile.class.getName()); public static void main(String[] args) {
String path = "C:\\test.xls";
System.out.println(readExcel(path));
} /**
* 读取Excel文件并将数据放到list中
*
* @param path 文件的绝对路径
* @return list
*/
public static List<List<List<String>>> readExcel(String path) {
List<List<List<String>>> sheetList = Lists.newArrayList();
try {
Workbook workbook = WorkbookFactory.create(new FileInputStream(path));
int sheetNum = workbook.getNumberOfSheets(); // sheet数目
List<List<String>> rowList = Lists.newArrayList();
for (int i = 0; i < sheetNum; i++) {
Sheet sheet = workbook.getSheetAt(i);
int rowNum = sheet.getPhysicalNumberOfRows(); // 总行数
for (int r = 0; r < rowNum; r++) {
int cellNum = sheet.getRow(r).getPhysicalNumberOfCells(); // 每一行包含的总格子数
List<String> cellList = Lists.newArrayList();
for (short c = 0; c < cellNum; c++) {
Cell cell = sheet.getRow(r).getCell(c);
String value;
if (cell == null) continue;
switch (cell.getCellTypeEnum()) {
case FORMULA: // 公式
value = String.valueOf(cell.getCellFormula());
cellList.add(value);
break;
case NUMERIC: // 数值
value = String.valueOf(cell.getNumericCellValue());
cellList.add(value);
break;
case STRING: // 字符串
value = String.valueOf(cell.getStringCellValue());
cellList.add(value);
break;
case BLANK: // 空白
value = "";
cellList.add(value);
break;
case BOOLEAN:
value = String.valueOf(cell.getBooleanCellValue());
cellList.add(value);
break;
case ERROR:
value = String.valueOf(cell.getErrorCellValue());
cellList.add(value);
break;
case _NONE:
value = "error at : " + cell.getAddress();
throw new PlatformException(value); // 自定义的异常类
default:
break;
}
}
rowList.add(cellList);
}
sheetList.add(rowList);
}
} catch (IOException e) {
LOGGER.debug(e.getMessage());
} catch (InvalidFormatException e) {
LOGGER.debug(e.getMessage());
} catch (PlatformException e) {
LOGGER.debug(e.getMessage());
}
return sheetList;
}
}
这个是简易版,自定义的成分居多。
需要导入的包有
'org.apache.poi:poi:3.15',
'org.apache.poi:poi-ooxml:3.15',
'com.github.virtuald:curvesapi:1.04',
'commons-codec:commons-codec:1.10',
'org.apache.poi:poi-ooxml-schemas:3.15',
'org.apache.commons:commons-collections4:4.1'
构建工具为gradle。