java转换文本文件到xlsx（自制缓冲区，无需先验文件行数）

【0】README

0.1）本文代码利用了 Apache POI 框架建立 java 到 xlsx 代码的联系；

0.2）本文自制缓冲区从文本文件中读取数据读取，无需先验文件行数；

0.3）本文通过缓冲区大小创建数组，数组容量自动增加，该raw idea 来自于 tomcat 源码中在容器关联管道增加非基础阀的处理方式，包括Session池容量的增加也是这种处理方式；

0.4）for complete source code, please visit https://github.com/pacosonTang/postgraduate-research/tree/master/DataProcess；

【1】如何自制缓冲区读取数据

step1）先建立一个缓冲大小确定（capacity）的二维数组指针data；

step2）通过循环读取数据，并填充二维数组；

step3）判断二维数组大小是否等于原定的缓冲大小（capacity）；

step3.1）若等于：则增大缓冲区大小为 newcapacity，并建立容量为 newcapacity 的二维数组指针datacopy，将原来的data数组的内容copy或填充到datacopy，并使得data指针的指向等于datacopy的指向；

step3.2）若不等于：继续下一步循环；

step4）循环完毕后，建立容量为 newcapacity 的二维数组指针datacopy，将原来的data数组的内容copy或填充到datacopy，并使得data指针的指向等于datacopy的指向；（bingo）

Attention）

A1）本文的capacity设定为10，你可以设定其他值，视具体情况而定；

A2）本文代码在某些地方吧 data设置为null，是为了便于 jvm 回收其内存；

// the core code begins.
		int capacity=10; // buffer size, of course you can specify other values of capacity. step1		
		double[][] data = new double[capacity][];
		int lineNum = 0;
		
		while((str=reader.readLine())!=null) { // step2
			String[] array = str.split(",");
			double[] temp = new double[array.length-1];
			
			for (int i = 0; i < array.length-1; i++) {
				temp[i] = Double.valueOf(array[i]);
			}
			if(lineNum%capacity==0) {  // step3
				double[][] datacopy = new double[lineNum+capacity][]; // step3.1
				for (int i = 0; i < data.length; i++) {
					datacopy[i] = data[i];
				}
				data = null;
				data = datacopy;				
			} 
			data[lineNum++] = temp;	// step3.2	
		}
		double[][] datacopy = new double[lineNum][]; // step4.
		for (int i = 0; i < datacopy.length; i++) {
			datacopy[i] = data[i];
		}
		data = null;
		data = datacopy;
		// the core code ends.<span style="font-family: SimSun; background-color: rgb(255, 255, 255);"> </span>

【2】intro to Apache POI

1）for downloading poi lib , please visit http://poi.apache.org/download.html，but you can also download the libs of mine Apache POI lib.

2）POI 将 xlsx 文件抽象为 Workbook对象，将xlsx文件中每张工作表抽象为 Sheet对象，代码如下：

public class XlsxDataModel
{
	private Workbook workbook;
	private Sheet sheet;
	static XlsxDataModel model;

	private XlsxDataModel(String sheetName){
		this.workbook = new XSSFWorkbook();;
		this.sheet = workbook.createSheet(sheetName);
	}
	
	public static XlsxDataModel getInstance(String sheetName){
		if(model == null){
			model = new XlsxDataModel(sheetName);
		}
		return model;
	}

	public Workbook getWorkbook() {
		return workbook;
	}

	public Sheet getSheet() {
		return sheet;
	}
}

3）POI 将每一行抽象为Row对象，将每个单元格抽象为 Cell对象，这样就可以定位到每个单元格了，部分代码如下：

XlsxDataModel model = XlsxDataModel.getInstance(sheetName);
Row row = model.getSheet().createRow(0);
Cell cell = row.createCell(0);
cell.setCellValue("");

4）POI还从Sheet对象中抽象出遍历每行的迭代器，代码如下（从xlsx读取数据需要迭代器）：

// 获取数据行 迭代器
	final Iterator<Row> readIterator() {
		Iterator<Row> itr = null;
		try {
			File excel = new File(this.dataPath);
			FileInputStream fis = new FileInputStream(excel);
			// 创建工作簿
			XSSFWorkbook book = new XSSFWorkbook(fis);
			// 创建工作簿下的第一页纸张
			XSSFSheet sheet = book.getSheetAt(0);

			// 纸张的迭代器，用于遍历行
			itr = sheet.iterator();
			// Iterating over Excel file in Java

		} catch (Exception e) {
			// TODO: handle exception
			e.printStackTrace();
		}
		return itr;
	}

【3】将【1】中的idea 同 POI 结合起来

0）本文重点讲解写数据（写入到xlsx文件），读数据（从xlsx文件中读数据）只是po出方法而已；

1）目的：从文本文件中读取数据，并转化为xlsx文件格式；

step1）从文本读取数据；

step2）将存储文本数据的二维数组写入到xlsx；

public class DataProcess { // source code for step1 begins.
	public static void main(String[] args) throws IOException {
		String basedir = System.getProperty("user.dir") + File.separator;
		BufferedReader reader = 
				new BufferedReader(new InputStreamReader(
						new FileInputStream(basedir+"iris_data.txt")));
		String str;
		
		// the core code begins.
		int capacity=10; // buffer size, of course you can specify other values of capacity.		
		double[][] data = new double[capacity][];
		int lineNum = 0;
		
		while((str=reader.readLine())!=null) {
			String[] array = str.split(",");
			double[] temp = new double[array.length-1];
			
			for (int i = 0; i < array.length-1; i++) {
				temp[i] = Double.valueOf(array[i]);
			}
			if(lineNum%capacity==0) { 
				double[][] datacopy = new double[lineNum+capacity][];
				for (int i = 0; i < data.length; i++) {
					datacopy[i] = data[i];
				}
				data = null;
				data = datacopy;				
			} 
			data[lineNum++] = temp;			
		}
		double[][] datacopy = new double[lineNum][];
		for (int i = 0; i < datacopy.length; i++) {
			datacopy[i] = data[i];
		}
		data = null;
		data = datacopy;
		// source code for step1 ends.
		
		DataWrite writer = new DataWrite(basedir+"gmeans_irise1.xlsx");// source code for step2 begins.
		writer.writeArray(data, "item", data[0].length, "iris"); // source code for step2 ends. highlight line.
	}
}

/**
 * @author Rong Tang
 * @version 1.0
 * @since 20150911
 */
public class DataWrite {
	private String filepath;
	private FileOutputStream out;

	public DataWrite(String filepath) {
		this.filepath = filepath;
	}

	/**
	 * @param data is a double array storing data written into xlsx.
	 * @param colPrefix is a row tag.
	 * @param headColNum is column number.
	 * @param sheetName is the name of sheet.
	 * @throws IOException
	 */
	public void writeArray(double[][] data, String colPrefix, int headColNum, String sheetName)
			throws IOException {
		XlsxDataModel model = XlsxDataModel.getInstance(sheetName);
		Row row = model.getSheet().createRow(0);
		Cell cell = row.createCell(0);
		cell.setCellValue("");

		for (int i = 1; i <= headColNum; i++) {
			cell = row.createCell(i);
			cell.setCellValue(i);
		} // build the head line over

		for (int i = 0; i < data.length; i++) {
			row = model.getSheet().createRow(i + 1);

			cell = row.createCell(0);
			cell.setCellValue(colPrefix + (i + 1));
			for (int j = 0; j < data[i].length; j++) {
				cell = row.createCell(j + 1);
				cell.setCellValue(data[i][j]);
			}
		}// write the cluster result(centroid vector) into xlsx over

		out = new FileOutputStream(filepath);
		model.getWorkbook().write(out);
		out.flush();
		out.close();

		System.out.println("write " + filepath + " over");
	}
	
	/**
	 * @param data is a int array storing data written into xlsx.
	 * @param colPrefix is a row tag.
	 * @param headColNum is column number.
	 * @param sheetName is the name of sheet.
	 * @throws IOException
	 */
	public void writeArray(int[][] data, String colPrefix, int headColNum, String sheetName)
			throws IOException {
		XlsxDataModel model = XlsxDataModel.getInstance(sheetName);
		Row row = model.getSheet().createRow(0);
		Cell cell = row.createCell(0);
		cell.setCellValue("");

		for (int i = 1; i <= headColNum; i++) {
			cell = row.createCell(i);
			cell.setCellValue(i);
		} // build the head line over

		for (int i = 0; i < data.length; i++) {
			row = model.getSheet().createRow(i + 1);

			cell = row.createCell(0);
			cell.setCellValue(colPrefix + (i + 1));
			for (int j = 0; j < data[i].length; j++) {
				cell = row.createCell(j + 1);
				cell.setCellValue(data[i][j]);
			}
		}// write the cluster result(centroid vector) into xlsx over

		out = new FileOutputStream(filepath);
		model.getWorkbook().write(out);
		out.flush();
		out.close();

		System.out.println("write " + filepath + " over");
	}
}

2）本文象征性的po 出读数据方法

/**
 * @author Rong Tang
 * @version 1.0
 * @since 20150911
 */
public class DataRead {
	private String dataPath;

	public DataRead(String dataPath) {
		this.dataPath = dataPath;
	}
	
	/**
	 * 
	 * @param row_start is a startup row startup index for reading. 
	 * @param col_start is a startup column index for reading.
	 * @param array is a double array storing the data read from some xlsx. 
	 */
	public final void readDataToArray(int row_start, int col_start, double[][] array) {
		
		Iterator<Row> itr = readIterator(); // 获得遍历行 的迭代器
		Row row = null; // 行对象
		int row_index = 0;// 行索引
		int col_index = 0;// 列索引
		int row_length = array.length; // 数据行数
		int col_length = array[0].length; // 数据列数
		
		// the first row is ommited for it stores column index
		if (itr.hasNext()) {
			itr.next();
		}
		
		// 定位行指针到 row_start
		while(itr.hasNext()){
			row_index++;
			if(row_index == row_start) {
				row_index = 0;
				break;
			}
			itr.next();
		}// 定位 over
		
		// other rows stores time series data
		while (itr.hasNext() && (row_index<row_length)) {
			col_index = 0;
			row = itr.next();
			Iterator<Cell> cellIterator = row.cellIterator(); // 遍历每行单元格的迭代器
			
			Cell cell = null; 
			// the first column is ommited for it stores row index
			if(cellIterator.hasNext()) { 
				cellIterator.next();
			}
			
			// 定位列指针到 col_start
			while(cellIterator.hasNext()) {
				col_index++;
				if(col_index == col_start) {
					col_index = 0;
					break;
				}
				cellIterator.next();
			}// 定位 over
			
			while (cellIterator.hasNext() && (col_index<col_length)) {
				cell = cellIterator.next();
				array[row_index][col_index++] = cell.getNumericCellValue();
			}// 一行数据读取完毕

			row_index++;			
		} // 数据行读取完毕
	}
	
	// read data from xlsx to array
	public final void readDataToArray(int row_start, int row_end) {
		
		Iterator<Row> itr = readIterator(); // 获得遍历行 的迭代器
		Row row = null; // 行对象
		int index = 0;// 行索引
		int row_length = row_end-row_start+1; // 数据行数
		
		// the first row is ommited for it stores column index
		if (itr.hasNext()) {
			row = itr.next();
		}
		
		// 定位行指针到 row_start
		while(itr.hasNext()){
			index++;
			if(index == row_start) {
				break;
			}
			row = itr.next();
		}
		
		index -= row_start;
		// other rows stores time series data
		while (itr.hasNext() && (index!=row_length)) {
			int j = 0;
			row = itr.next();
			Iterator<Cell> cellIterator = row.cellIterator(); // 遍历每行单元格的迭代器
			
			Cell cell = null; 
			// the first column is ommited for it stores row index
			if(cellIterator.hasNext())
				cell = cellIterator.next();

			while (cellIterator.hasNext()) {
				cell = cellIterator.next();
				ClusterData.items[index][j++] = cell.getNumericCellValue();  
			}// 一行数据读取完毕

			index++;			
		} // 数据行读取完毕
	}

	// 获取数据行 迭代器
	final Iterator<Row> readIterator() {
		Iterator<Row> itr = null;
		try {
			File excel = new File(this.dataPath);
			FileInputStream fis = new FileInputStream(excel);
			// 创建工作簿
			XSSFWorkbook book = new XSSFWorkbook(fis);
			// 创建工作簿下的第一页纸张
			XSSFSheet sheet = book.getSheetAt(0);

			// 纸张的迭代器，用于遍历行
			itr = sheet.iterator();
			// Iterating over Excel file in Java

		} catch (Exception e) {
			// TODO: handle exception
			e.printStackTrace();
		}
		return itr;
	}
}

秒客网

java转换文本文件到xlsx（自制缓冲区，无需先验文件行数）

相关文章