I am new to apache poi, I wanted to split a excel file into multiple files based on row count.
我是apache poi的新手,我想基于行数将excel文件分割成多个文件。
E.g data.xlsx has 15k rows, new files should be like data_1.xlsx with 5k rows,data_2.xlsx should be 5-10k and data_3.xlsx should be 10-15k.
E。g数据。xlsx有15k行,新文件应该类似于data_1。xlsx 5 k行,data_2。xlsx应该是5-10k和data_3。xlsx应该10-15k。
1 个解决方案
#1
5
I've got you.
我有你。
package com.industries.seanimus;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class ReportSplitter {
private final String fileName;
private final int maxRows;
public ReportSplitter(String fileName, final int maxRows) {
ZipSecureFile.setMinInflateRatio(0);
this.fileName = fileName;
this.maxRows = maxRows;
try {
/* Read in the original Excel file. */
OPCPackage pkg = OPCPackage.open(new File(fileName));
XSSFWorkbook workbook = new XSSFWorkbook(pkg);
XSSFSheet sheet = workbook.getSheetAt(0);
/* Only split if there are more rows than the desired amount. */
if (sheet.getPhysicalNumberOfRows() >= maxRows) {
List<SXSSFWorkbook> wbs = splitWorkbook(workbook);
writeWorkBooks(wbs);
}
pkg.close();
}
catch (EncryptedDocumentException | IOException | InvalidFormatException e) {
e.printStackTrace();
}
}
private List<SXSSFWorkbook> splitWorkbook(XSSFWorkbook workbook) {
List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>();
SXSSFWorkbook wb = new SXSSFWorkbook();
SXSSFSheet sh = wb.createSheet();
SXSSFRow newRow;
SXSSFCell newCell;
int rowCount = 0;
int colCount = 0;
XSSFSheet sheet = workbook.getSheetAt(0);
for (Row row : sheet) {
newRow = sh.createRow(rowCount++);
/* Time to create a new workbook? */
if (rowCount == maxRows) {
workbooks.add(wb);
wb = new SXSSFWorkbook();
sh = wb.createSheet();
rowCount = 0;
}
for (Cell cell : row) {
newCell = newRow.createCell(colCount++);
newCell = setValue(newCell, cell);
CellStyle newStyle = wb.createCellStyle();
newStyle.cloneStyleFrom(cell.getCellStyle());
newCell.setCellStyle(newStyle);
}
colCount = 0;
}
/* Only add the last workbook if it has content */
if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) {
workbooks.add(wb);
}
return workbooks;
}
/*
* Grabbing cell contents can be tricky. We first need to determine what
* type of cell it is.
*/
private SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
switch (cell.getCellType()) {
case Cell.CELL_TYPE_STRING:
newCell.setCellValue(cell.getRichStringCellValue().getString());
break;
case Cell.CELL_TYPE_NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
newCell.setCellValue(cell.getDateCellValue());
} else {
newCell.setCellValue(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_BOOLEAN:
newCell.setCellValue(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
newCell.setCellFormula(cell.getCellFormula());
break;
default:
System.out.println("Could not determine cell type");
}
return newCell;
}
/* Write all the workbooks to disk. */
private void writeWorkBooks(List<SXSSFWorkbook> wbs) {
FileOutputStream out;
try {
for (int i = 0; i < wbs.size(); i++) {
String newFileName = fileName.substring(0, fileName.length() - 5);
out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx"));
wbs.get(i).write(out);
out.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args){
/* This will create a new workbook every 1000 rows. */
new ReportSplitter("Data.xlsx", 1000);
}
}
A few notes:
一些笔记:
-
For writing the workbooks, I use SXSSFWorkbook. It's a lot faster than HSSF or XSSF, as it doesn't hold everything in memory before writing (which causes a horrible gc mess).
为了编写工作簿,我使用SXSSFWorkbook。它比HSSF或XSSF要快得多,因为在编写之前它不会保存所有内存(这会导致可怕的gc混乱)。
-
The Busy Developer's Guide is your friend for learning Apache POI ;)
忙碌的开发人员指南是您学习Apache POI的朋友;)
ENJOY!
享受吧!
EDIT: I've updated the code to copy cell styles as well. Two things to note about this:
- Copying styles will SLOW things down considerably.
- 复制样式会大大降低速度。
- POI creates a template file that may become too big to be uncompressed, throwing a Zip bomb detected error. You can fix this by changing the minimum inflation ratio via ZipSecureFile.setMinInflateRatio(0).
- POI创建了一个模板文件,该文件可能会变得太大而无法解压缩,并抛出一个Zip bomb检测到的错误。您可以通过使用ZipSecureFile.setMinInflateRatio(0)改变最低的通货膨胀率来解决这个问题。
#1
5
I've got you.
我有你。
package com.industries.seanimus;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class ReportSplitter {
private final String fileName;
private final int maxRows;
public ReportSplitter(String fileName, final int maxRows) {
ZipSecureFile.setMinInflateRatio(0);
this.fileName = fileName;
this.maxRows = maxRows;
try {
/* Read in the original Excel file. */
OPCPackage pkg = OPCPackage.open(new File(fileName));
XSSFWorkbook workbook = new XSSFWorkbook(pkg);
XSSFSheet sheet = workbook.getSheetAt(0);
/* Only split if there are more rows than the desired amount. */
if (sheet.getPhysicalNumberOfRows() >= maxRows) {
List<SXSSFWorkbook> wbs = splitWorkbook(workbook);
writeWorkBooks(wbs);
}
pkg.close();
}
catch (EncryptedDocumentException | IOException | InvalidFormatException e) {
e.printStackTrace();
}
}
private List<SXSSFWorkbook> splitWorkbook(XSSFWorkbook workbook) {
List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>();
SXSSFWorkbook wb = new SXSSFWorkbook();
SXSSFSheet sh = wb.createSheet();
SXSSFRow newRow;
SXSSFCell newCell;
int rowCount = 0;
int colCount = 0;
XSSFSheet sheet = workbook.getSheetAt(0);
for (Row row : sheet) {
newRow = sh.createRow(rowCount++);
/* Time to create a new workbook? */
if (rowCount == maxRows) {
workbooks.add(wb);
wb = new SXSSFWorkbook();
sh = wb.createSheet();
rowCount = 0;
}
for (Cell cell : row) {
newCell = newRow.createCell(colCount++);
newCell = setValue(newCell, cell);
CellStyle newStyle = wb.createCellStyle();
newStyle.cloneStyleFrom(cell.getCellStyle());
newCell.setCellStyle(newStyle);
}
colCount = 0;
}
/* Only add the last workbook if it has content */
if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) {
workbooks.add(wb);
}
return workbooks;
}
/*
* Grabbing cell contents can be tricky. We first need to determine what
* type of cell it is.
*/
private SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
switch (cell.getCellType()) {
case Cell.CELL_TYPE_STRING:
newCell.setCellValue(cell.getRichStringCellValue().getString());
break;
case Cell.CELL_TYPE_NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
newCell.setCellValue(cell.getDateCellValue());
} else {
newCell.setCellValue(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_BOOLEAN:
newCell.setCellValue(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
newCell.setCellFormula(cell.getCellFormula());
break;
default:
System.out.println("Could not determine cell type");
}
return newCell;
}
/* Write all the workbooks to disk. */
private void writeWorkBooks(List<SXSSFWorkbook> wbs) {
FileOutputStream out;
try {
for (int i = 0; i < wbs.size(); i++) {
String newFileName = fileName.substring(0, fileName.length() - 5);
out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx"));
wbs.get(i).write(out);
out.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args){
/* This will create a new workbook every 1000 rows. */
new ReportSplitter("Data.xlsx", 1000);
}
}
A few notes:
一些笔记:
-
For writing the workbooks, I use SXSSFWorkbook. It's a lot faster than HSSF or XSSF, as it doesn't hold everything in memory before writing (which causes a horrible gc mess).
为了编写工作簿,我使用SXSSFWorkbook。它比HSSF或XSSF要快得多,因为在编写之前它不会保存所有内存(这会导致可怕的gc混乱)。
-
The Busy Developer's Guide is your friend for learning Apache POI ;)
忙碌的开发人员指南是您学习Apache POI的朋友;)
ENJOY!
享受吧!
EDIT: I've updated the code to copy cell styles as well. Two things to note about this:
- Copying styles will SLOW things down considerably.
- 复制样式会大大降低速度。
- POI creates a template file that may become too big to be uncompressed, throwing a Zip bomb detected error. You can fix this by changing the minimum inflation ratio via ZipSecureFile.setMinInflateRatio(0).
- POI创建了一个模板文件,该文件可能会变得太大而无法解压缩,并抛出一个Zip bomb检测到的错误。您可以通过使用ZipSecureFile.setMinInflateRatio(0)改变最低的通货膨胀率来解决这个问题。