逗号分隔值(Comma-Separated Values,CSV,有时也称为字符分隔值,因为分隔字符也可以不是逗号),其文件以纯文本形式存储表格数据(数字和文本)。纯文本意味着该文件是一个字符序列,不含必须像二进制数字那样被解读的数据。CSV文件由任意数目的记录组成,记录间以某种换行符分隔;每条记录由字段组成,字段间的分隔符是其它字符或字符串。
如果不想看过程分析,想直接看代码请跳到最后!!!
分隔符:
最常见的分隔符是逗号或制表符,但是由于项目中使用的是︴
,所以以下案例以此字符演示。
思路:
- 传入.csv文件集所在的目录,例如
new File("C:\\Users\\brbai\\Desktop\\files")
- 递归读取此目录下所有层级的.csv文件
- 调用insertData(File file,int type)使用字符缓冲流读取首行的字段名,并将字符缓冲流传给readCSV使用
- 按需求对读取的字符做转换,使其与数据库类型匹配,方便插入数据库。例如 : 首行为字段名,用
,
连接,其他行数据,普通字符串用''
包裹,时间转换为相应格式,其他同理 - readCSV(BufferedReader reader)每次读取1000行数据返回,批量插入数据库,插入成功后继续读取,每个.csv文件读取完后,关闭缓冲流
- 依次读取每个文件,直至所有文件读取写入结束
遍历文件夹,读取所有.csv文件,插入数据库
/**
* @Description: 遍历文件夹,读取文件插入数据库
* @Param: [f] 目录(文件夹)
* @return: void
*/
public void getFileName(File f) {
//获取当前目录的子目录对象的数组
File[] file = f.listFiles();
//遍历当前File数组(父目录的)
for (File file2 : file) {
String fileName = file2.getName();
if(!file2.isDirectory()){
long start = System.currentTimeMillis();
if(fileName.indexOf("soa_bank") != -1){
insertData(file2,SOA_BANK);
}else if(fileName.indexOf("soa_account") != -1){
insertData(file2,SOA_ACCOUNT);
}
log.info("文件{}导入完成,用时{}ms",file2.getPath(),(System.currentTimeMillis()-start));
}
//如果file2是一个文件夹则递归遍历子目录,否则结束当前目录遍历
if(file2.isDirectory() && !"today".equals(fileName)) {
getFileName(file2);
}
}
}
读取csv文件
/**
* @Description: 把数据从csv文件中读取到list
* @Param: [reader] 字符缓冲流
* @return: 读取到的数据(一次读取1000行)
*/
public List readCSV(BufferedReader reader){
try {
ArrayList<String> list = new ArrayList<String>();
String line = null;
for(int i=0; i < 1000; i++) {
line = reader.readLine();
if(line == null){
break;
}
line = "'"+line.replaceAll("︴","','")+"'";
//匹配'2019-01-01'这种时间格式的正则表达式
String regex1 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[']";
Pattern pattern1 = Pattern.compile(regex1);
Matcher m1 = pattern1.matcher(line);
line = line.replaceAll(regex1 ,"︴");
//替换时间格式
while(m1.find()){
line = line.replaceFirst("︴"," to_date ( "+m1.group()+", 'YYYY-MM-DD HH24:MI:SS' )");
}
//匹配'2019-01-01 12:12:06'这种时间格式的正则表达式
String regex2 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[ ][0-9]{1,2}[:][0-9]{1,2}[:][0-9]{1,2}[']";
Pattern pattern2 = Pattern.compile(regex2);
Matcher m2 = pattern2.matcher(line);
line = line.replaceAll(regex2 ,"︴");
//替换时间格式
while(m2.find()){
line = line.replaceFirst("︴"," to_date ( "+m2.group()+", 'YYYY-MM-DD HH24:MI:SS' )");
}
list.add(line);
}
return list;
} catch (Exception e) {
log.error("文件:{}读取信息出错");
try {
reader.close();
} catch (IOException ex) {
ex.printStackTrace();
}
e.printStackTrace();
}
return null;
}
批量插入数据库
/**
* @Description: 批量插入到数据库
* @Param: [file, type] file:文件,type:数据库对应表
* @return: void
*/
public void insertData(File file,int type) {
String head = null;
List<String> list = null;
BufferedReader reader = null;
//读取第一行的标题信息
try {
reader = new BufferedReader(new FileReader(file));
head = reader.readLine();
if(head == null){
log.info("文件{}内容为空!!!",file.getPath());
return;
} else {
head = head.replaceAll("︴",",");
}
while ((list = readCSV(reader)).size() != 0){
switch (type){
case 1:{
bankInfoMapper.insertListData(head,list);
}
break;
case 2:{
accountMapper.insertListData(head,list);
}
break;
}
log.info("文件{}导入中--- +{}行",file.getPath(),list.size());
}
} catch (IOException e) {
log.info("***文件{}导入数据库出现异常***",file.getPath());
e.printStackTrace();
}finally {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Mapper接口(AccountMapper为例)
public interface AccountMapper extends BaseMapper<Account> {
void insertListData(@Param("head") String head,@Param("list") List<String> list);
}
批量插入(Oracle数据库)(对应Mapper接口的xml文件,例)
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-////DTD Mapper 3.0//EN" "/dtd/">
<mapper namespace="">
<insert id="insertListData" useGeneratedKeys="false">
insert into T_ACCOUNT ( ${head} )
<foreach collection="list" separator ="UNION ALL" item="item">
(
SELECT
${item}
FROM DUAL
)
</foreach>
</insert>
</mapper>
读取数据库表结构
当csv文件中时间为null,或数据字段不含全部的表字段时,可能需要查询数据库各字段属性与其对应做相关处理,此处只提供数据库表结构的读取,相关处理按实际业务情况。
//注入SqlSessionFactory
@Autowired
private SqlSessionFactory sqlSessionFactory;
/**
* @Description: 读取数据库表结构
* @Param: [tableName]
* @return: <,<,>>
*/
public Map<String,Map<String, String>> getDatabaseAttribute(String tableName) {
SqlSession sqlSession = sqlSessionFactory.openSession();
Connection connection = sqlSession.getConnection();
Map<String,Map<String, String>> database = new HashMap<>();
DatabaseMetaData metaData = null;
try {
metaData = connection.getMetaData();
ResultSet columns = metaData
.getColumns(null, "DEVTEST", tableName, "%");
Map<String,String> attributes = new HashMap<>();
while (columns.next()){
attributes.put(columns.getString("COLUMN_NAME"),columns.getString("TYPE_NAME"));
}
database.put(tableName,attributes);
} catch (SQLException e) {
e.printStackTrace();
}finally {
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
return database;
}
完整工具类 CSVUtils
package com.xxx.spmacore.common.utils;
import com.xxx.spmacore.soa.mapper.AccountMapper;
import com.xxx.spmacore.soa.mapper.BankInfoMapper;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
/**
* @author: brbai
* @create: 2019-07-09 11:29:41
* @description:
*/
@Component
@Slf4j
public class CSVUtils {
@Autowired
private BankInfoMapper bankInfoMapper;
@Autowired
private AccountMapper accountMapper;
private static final int SOA_BANK = 1;
private static final int SOA_ACCOUNT = 2;
/**
* @Description: 遍历文件夹,读取文件插入数据库
* @Param: [f] 目录(文件夹)
* @return: void
*/
public void getFileName(File f) {
//获取当前目录的子目录对象的数组
File[] file = f.listFiles();
//遍历当前File数组(父目录的)
for (File file2 : file) {
String fileName = file2.getName();
if(!file2.isDirectory()){
long start = System.currentTimeMillis();
if(fileName.indexOf("soa_bank") != -1){
insertData(file2,SOA_BANK);
}else if(fileName.indexOf("soa_account") != -1){
insertData(file2,SOA_ACCOUNT);
}
log.info("文件{}导入完成,用时{}ms",file2.getPath(),(System.currentTimeMillis()-start));
}
//如果file2是一个文件夹则递归遍历子目录,否则结束当前目录遍历
if(file2.isDirectory()) {
getFileName(file2);
}
}
}
/**
* @Description: 把数据从csv文件中读取到list
* @Param: [reader] 字符缓冲流
* @return: 读取到的数据(一次读取1000行)
*/
public List readCSV(BufferedReader reader){
try {
ArrayList<String> list = new ArrayList<String>();
String line = null;
for(int i=0; i < 1000; i++) {
line = reader.readLine();
if(line == null){
break;
}
line = "'"+line.replaceAll("︴","','")+"'";
//'xxxx-xx-xx'正则表达式
String regex1 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[']";
Pattern pattern1 = Pattern.compile(regex1);
Matcher m1 = pattern1.matcher(line);
line = line.replaceAll(regex1 ,"︴");
//替换时间格式
while(m1.find()){
line = line.replaceFirst("︴"," to_date ( "+m1.group()+", 'YYYY-MM-DD HH24:MI:SS' )");
}
//'xxxx-xx-xx xx:xx:xx'正则表达式
String regex2 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[ ][0-9]{1,2}[:][0-9]{1,2}[:][0-9]{1,2}[']";
Pattern pattern2 = Pattern.compile(regex2);
Matcher m2 = pattern2.matcher(line);
line = line.replaceAll(regex2 ,"︴");
//替换时间格式
while(m2.find()){
line = line.replaceFirst("︴"," to_date ( "+m2.group()+", 'YYYY-MM-DD HH24:MI:SS' )");
}
list.add(line);
}
return list;
} catch (Exception e) {
log.error("文件:{}读取信息出错");
try {
reader.close();
} catch (IOException ex) {
ex.printStackTrace();
}
e.printStackTrace();
}
return null;
}
/**
* @Description: 批量插入到数据库
* @Param: [file, type] file:文件,type:数据库对应表
* @return: void
*/
public void insertData(File file,int type) {
String head = null;
List<String> list = null;
BufferedReader reader = null;
//读取第一行的标题信息
try {
reader = new BufferedReader(new FileReader(file));
head = reader.readLine();
if(head == null){
log.info("文件{}内容为空!!!",file.getPath());
return;
} else {
head = head.replaceAll("︴",",");
}
while ((list = readCSV(reader)).size() != 0){
switch (type){
case 1:{
bankInfoMapper.insertListData(head,list);
}
break;
case 2:{
accountMapper.insertListData(head,list);
}
break;
}
log.info("文件{}导入中--- +{}行",file.getPath(),list.size());
}
} catch (IOException e) {
log.info("***文件{}导入数据库出现异常***",file.getPath());
e.printStackTrace();
}finally {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
欢迎访问本文的个人博客链接: /2019/07/12/Java读取CSV文件,并批量写入数据库/