json文件大小为10G左右,对于这种超大的json使用jackson的objectmapper一把映射到java对象是不太可行的,内存一般都会溢出。此时需要使用jackson的JsonParser从文件中逐个token去读取。一般大json中都会存在某个数组中有超多的数据记录,我们需要解决的就是记录当前token路径,在读取到超大json数组时,再利用逐条数据读取(jsonParser)逐条读取数据,利用数组缓存一定量的数据后,写入数据库后继续读取,知道json数组数据读取结束。
这里关键的是记录当前json的token的路径,以方便地利用(jsonParser)读取任意的子节点数据。
import ;
import ;
import ;
import ;
import ;
import ;
import ;
import ;
public class Main {
public static void main(String [] args){
long start = ();
try {
ObjectMapper mapper = new ObjectMapper();
JsonFactory jsonFactory = new JsonFactory();
JsonParser jsonParser = (
new File("E:/"));
JsonPath path = new JsonPath();
while (() != null) {
traverse(jsonParser, 0,path,mapper);
}
();
((()-start)/1000+"秒");
} catch (IOException e) {
();
}
}
private static void traverse(JsonParser jsonParser, int depth, JsonPath path, ObjectMapper mapper) throws IOException {
JsonToken token = ();
if (token == null) {
return;
}
setPath(jsonParser,depth,path);
if (().equals("[i].results[i]")) {
while (() == JsonToken.START_OBJECT) {
JsonNode jsonNode = (jsonParser);
// 此处可以读取一定数量的数据后入库
(());
}
token = ();
}
// Recursive call for nested structures
if (token == JsonToken.START_OBJECT || token == JsonToken.START_ARRAY) {
while (() != null && () != JsonToken.END_OBJECT
&& () != JsonToken.END_ARRAY) {
traverse(jsonParser, depth + 1,path,mapper);
}
}
}
/**
* 同一个depth只记录一个node
* @param jsonParser
* @param depth
* @param path
* @throws IOException
*/
private static void setPath(JsonParser jsonParser, int depth, JsonPath path) throws IOException {
JsonToken token = ();
if (() > depth+1) {
(depth+1,()).clear();
}
prefixNode = ()>=(depth+1)?(depth):null;
switch (token) {
case FIELD_NAME:
if (prefixNode != null) {
= JsonPath.NODE_FIELD;
= ();
} else {
node = new ();
= JsonPath.NODE_FIELD;
= ();
(node);
}
break;
case START_ARRAY:
if (prefixNode != null && JsonPath.NODE_FIELD.equals()) {
= +"[i]";
} else {
node = new ();
= JsonPath.NODE_ARRAY;
(node);
}
break;
case START_OBJECT:
if (prefixNode != null && JsonPath.NODE_FIELD.equals()) {
= +".";
} else {
node = new ();
= JsonPath.NODE_OBJECT;
(node);
}
break;
}
}
}
import ;
public class JsonPath {
public static String NODE_ARRAY = "array";
public static String NODE_OBJECT = "object";
public static String NODE_FIELD = "field";
public LinkedList<Node> nodeLink = new LinkedList<>();
public static class Node{
public String nodeName;
public String nodeType;
}
@Override
public String toString() {
if ( == null || ()) {
return "";
}
StringBuilder path = new StringBuilder();
for (int i = 0; i < (); i++ ) {
Node node = (i);
if (node == null) {
continue;
}
if (i == 0) {
if (NODE_ARRAY.equals()) {
("root[i]");
} else {
("root");
}
}
if (NODE_ARRAY.equals()) {
("[i]");
}else if (NODE_OBJECT.equals()) {
(".");
} else if (NODE_FIELD.equals()) {
();
}
}
return ();
}
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="/POM/4.0.0"
xmlns:xsi="http:///2001/XMLSchema-instance"
xsi:schemaLocation="/POM/4.0.0 /xsd/maven-4.0.">
<modelVersion>4.0.0</modelVersion>
<groupId></groupId>
<artifactId>BigJsonDeal</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<>19</>
<>19</>
<>UTF-8</>
</properties>
<dependencies>
<!-- /artifact//jackson-databind -->
<dependency>
<groupId></groupId>
<artifactId>jackson-databind</artifactId>
<version>2.16.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.jsonschema2pojo</groupId>
<artifactId>jsonschema2pojo-maven-plugin</artifactId>
<version>1.2.1</version>
<configuration>
<sourceDirectory>${basedir}/src/main/resources/schema</sourceDirectory>
<targetProject>${basedir}/src/main/java</targetProject>
<targetPackage></targetPackage>
</configuration>
</plugin>
</plugins>
</build>
</project>
jsonschema2pojo为一个根据json结构生成java的pojo代码的工具,对于复杂json生成pojo十分省时间。