XML解析错误:未组织好
位置:file:///C:/Users/long/DataScraperWorks/test/ddmk_weibo_fenxishi_list_16944905_3098730807.xml
行:690,列:69:
在Myeclipse中运行,输出为:
public class xmlValidate {
public static void main(String[] args) {
try{
String directory="C:/Users/long/DataScraperWorks/test";/*存储xml文件的文件夹*/
String fileSuffix = ".xml"; /*读取后缀为xml的文件*/
File file = new File(directory);
File[] filelist = file.listFiles();/*获取文件夹下的xml文件列表*/
for(int i=0;i<filelist.length;i++){
if(filelist[i].getName().lastIndexOf(fileSuffix)!=-1){
String filename = filelist[i].getName();/*获取每个xml文件的文件名*/
System.out.println(filename);
SAXReader reader = new SAXReader();
//reader.setFeature("http://apache.org/xml/features/validation/schema", true);
Document document = reader.read(directory+"/"+filename);/*读取xml文件,获得document对象*/
System.out.println("Load success!");
}
}
/*reader.setErrorHandler(new DefaultHandler()
{
public void error(SAXParseException e) throws SAXException{
System.out.println("error:");
System.out.println(e.getSystemId()+"文档的第"+e.getLineNumber()+"行 "+"发生错误,错误信息为 :"+e.getMessage());
}
});*/
}catch (Exception e){
System.out.println(e.getMessage());
}
}
}
输出结果:
ddmk_weibo_fenxishi_list_16944893_3098555023.xml
Load success!
ddmk_weibo_fenxishi_list_16944899_3106567492.xml
Load success!
ddmk_weibo_fenxishi_list_16944903_3098714823.xml
Load success!
ddmk_weibo_fenxishi_list_16944904_3098704145.xml
Load success!
ddmk_weibo_fenxishi_list_16944905_3098730807.xml
Error on line 690 of document file:///C:/Users/long/DataScraperWorks/test/ddmk_weibo_fenxishi_list_16944905_3098730807.xml : An invalid XML character (Unicode: 0xc) was found in the element content of the document. Nested exception: An invalid XML character (Unicode: 0xc) was found in the element content of the document.
我想咨询下大家,有没方法实现:当无法读取的xml文件时,则将该xml文件删除。
12 个解决方案
#1
捕获解析出错的异常,然后做删除处理。
#2
catch (Exception e){
System.out.println(e.getMessage());
}
catch说明出错异常的信息,那应该在哪一步做判断,然后进行删除处理。
#3
catch (Exception e){
System.out.println(e.getMessage());
// do delete op
}
System.out.println(e.getMessage());
// do delete op
}
#4
catch语句中写删除函数?
。。。。。。
#5
有什么问题?
#6
你用dom4j读取,如果读取出错,就知道是哪个节点有问题了,再到catch{}中做相应的处理
#7
如下代码所示,我希望分析test文件夹下的xml,现在又5个xml文件,执行下面代码后,结果如下。
package myproject;
import java.io.File;
import org.dom4j.Document;
import org.dom4j.io.SAXReader;
public class xmlValidate {
public static void main(String[] args) {
try{
String directory="C:/Users/long/DataScraperWorks/test";/*存储xml文件的文件夹*/
String fileSuffix = ".xml"; /*读取后缀为xml的文件*/
File file = new File(directory);
File[] filelist = file.listFiles();/*获取文件夹下的xml文件列表*/
for(int i=0;i<filelist.length;i++){
if(filelist[i].getName().lastIndexOf(fileSuffix)!=-1){
String filename = filelist[i].getName();/*获取每个xml文件的文件名*/
System.out.println(filename);
SAXReader reader = new SAXReader();
Document document = reader.read(directory+"/"+filename);/*读取xml文件,获得document对象*/
System.out.println("Load success!");
}
}
}catch (Exception e){
System.out.println(e.getMessage());
}
}
}
输出结果:
ddmk_weibo_exception_16970809_3426606620.xml
Load success!
ddmk_weibo_fenxishi_list_16944899_3106567492.xml
Load success!
ddmk_weibo_fenxishi_list_16944903_3098714823.xml
Load success!
ddmk_weibo_fenxishi_list_16944904_3098704145.xml
Load success!
ddmk_weibo_fenxishi_list_16944905_3098730807.xml
Error on line 690 of document file:///C:/Users/long/DataScraperWorks/test/ddmk_weibo_fenxishi_list_16944905_3098730807.xml : An invalid XML character (Unicode: 0xc) was found in the element content of the document. Nested exception: An invalid XML character (Unicode: 0xc) was found in the element content of the document.
#8
文件ddmk_weibo_fenxishi_list_16944905_3098730807.xml,不能正确读取。在try中for循环,依次处理每个xml文件,用filelist[i]表示。那么我该怎么进行操作呢?
package myproject;
import java.io.File;
import org.dom4j.Document;
import org.dom4j.io.SAXReader;
public class xmlValidate {
public static void main(String[] args) {
try{
String directory="C:/Users/long/DataScraperWorks/test";/*存储xml文件的文件夹*/
String fileSuffix = ".xml"; /*读取后缀为xml的文件*/
File file = new File(directory);
File[] filelist = file.listFiles();/*获取文件夹下的xml文件列表*/
for(int i=0;i<filelist.length;i++){
if(filelist[i].getName().lastIndexOf(fileSuffix)!=-1){
String filename = filelist[i].getName();/*获取每个xml文件的文件名*/
System.out.println(filename);
SAXReader reader = new SAXReader();
Document document = reader.read(directory+"/"+filename);/*读取xml文件,获得document对象*/
System.out.println("Load success!");
}
}
}catch (Exception e){
System.out.println(e.getMessage());
}
}
}
ddmk_weibo_exception_16970809_3426606620.xml
Load success!
ddmk_weibo_fenxishi_list_16944899_3106567492.xml
Load success!
ddmk_weibo_fenxishi_list_16944903_3098714823.xml
Load success!
ddmk_weibo_fenxishi_list_16944904_3098704145.xml
Load success!
ddmk_weibo_fenxishi_list_16944905_3098730807.xml
Error on line 690 of document file:///C:/Users/long/DataScraperWorks/test/ddmk_weibo_fenxishi_list_16944905_3098730807.xml : An invalid XML character (Unicode: 0xc) was found in the element content of the document. Nested exception: An invalid XML character (Unicode: 0xc) was found in the element content of the document.
#9
if(filelist[i].getName().lastIndexOf(fileSuffix)!=-1){
String filename = filelist[i].getName();/*获取每个xml文件的文件名*/
System.out.println(filename);
SAXReader reader = new SAXReader();
try {
Document document = reader.read(directory+"/"+filename);/*读取xml文件,获得document对象*/
}
catch(Exception e) {
filelist[i].delete();
}
System.out.println("Load success!");
}
#10
在catch里执行delete操作没有成功,该文件还在。在try里执行filelist[i].delete();就能删除文件。
请问这是怎么回事呢?
#11
没道理的么
#12
我也觉得疑惑啊~~~~~~~~~~~~~~
#1
捕获解析出错的异常,然后做删除处理。
#2
catch (Exception e){
System.out.println(e.getMessage());
}
catch说明出错异常的信息,那应该在哪一步做判断,然后进行删除处理。
#3
catch (Exception e){
System.out.println(e.getMessage());
// do delete op
}
System.out.println(e.getMessage());
// do delete op
}
#4
catch语句中写删除函数?
。。。。。。
#5
有什么问题?
#6
你用dom4j读取,如果读取出错,就知道是哪个节点有问题了,再到catch{}中做相应的处理
#7
如下代码所示,我希望分析test文件夹下的xml,现在又5个xml文件,执行下面代码后,结果如下。
package myproject;
import java.io.File;
import org.dom4j.Document;
import org.dom4j.io.SAXReader;
public class xmlValidate {
public static void main(String[] args) {
try{
String directory="C:/Users/long/DataScraperWorks/test";/*存储xml文件的文件夹*/
String fileSuffix = ".xml"; /*读取后缀为xml的文件*/
File file = new File(directory);
File[] filelist = file.listFiles();/*获取文件夹下的xml文件列表*/
for(int i=0;i<filelist.length;i++){
if(filelist[i].getName().lastIndexOf(fileSuffix)!=-1){
String filename = filelist[i].getName();/*获取每个xml文件的文件名*/
System.out.println(filename);
SAXReader reader = new SAXReader();
Document document = reader.read(directory+"/"+filename);/*读取xml文件,获得document对象*/
System.out.println("Load success!");
}
}
}catch (Exception e){
System.out.println(e.getMessage());
}
}
}
输出结果:
ddmk_weibo_exception_16970809_3426606620.xml
Load success!
ddmk_weibo_fenxishi_list_16944899_3106567492.xml
Load success!
ddmk_weibo_fenxishi_list_16944903_3098714823.xml
Load success!
ddmk_weibo_fenxishi_list_16944904_3098704145.xml
Load success!
ddmk_weibo_fenxishi_list_16944905_3098730807.xml
Error on line 690 of document file:///C:/Users/long/DataScraperWorks/test/ddmk_weibo_fenxishi_list_16944905_3098730807.xml : An invalid XML character (Unicode: 0xc) was found in the element content of the document. Nested exception: An invalid XML character (Unicode: 0xc) was found in the element content of the document.
#8
文件ddmk_weibo_fenxishi_list_16944905_3098730807.xml,不能正确读取。在try中for循环,依次处理每个xml文件,用filelist[i]表示。那么我该怎么进行操作呢?
package myproject;
import java.io.File;
import org.dom4j.Document;
import org.dom4j.io.SAXReader;
public class xmlValidate {
public static void main(String[] args) {
try{
String directory="C:/Users/long/DataScraperWorks/test";/*存储xml文件的文件夹*/
String fileSuffix = ".xml"; /*读取后缀为xml的文件*/
File file = new File(directory);
File[] filelist = file.listFiles();/*获取文件夹下的xml文件列表*/
for(int i=0;i<filelist.length;i++){
if(filelist[i].getName().lastIndexOf(fileSuffix)!=-1){
String filename = filelist[i].getName();/*获取每个xml文件的文件名*/
System.out.println(filename);
SAXReader reader = new SAXReader();
Document document = reader.read(directory+"/"+filename);/*读取xml文件,获得document对象*/
System.out.println("Load success!");
}
}
}catch (Exception e){
System.out.println(e.getMessage());
}
}
}
ddmk_weibo_exception_16970809_3426606620.xml
Load success!
ddmk_weibo_fenxishi_list_16944899_3106567492.xml
Load success!
ddmk_weibo_fenxishi_list_16944903_3098714823.xml
Load success!
ddmk_weibo_fenxishi_list_16944904_3098704145.xml
Load success!
ddmk_weibo_fenxishi_list_16944905_3098730807.xml
Error on line 690 of document file:///C:/Users/long/DataScraperWorks/test/ddmk_weibo_fenxishi_list_16944905_3098730807.xml : An invalid XML character (Unicode: 0xc) was found in the element content of the document. Nested exception: An invalid XML character (Unicode: 0xc) was found in the element content of the document.
#9
if(filelist[i].getName().lastIndexOf(fileSuffix)!=-1){
String filename = filelist[i].getName();/*获取每个xml文件的文件名*/
System.out.println(filename);
SAXReader reader = new SAXReader();
try {
Document document = reader.read(directory+"/"+filename);/*读取xml文件,获得document对象*/
}
catch(Exception e) {
filelist[i].delete();
}
System.out.println("Load success!");
}
#10
在catch里执行delete操作没有成功,该文件还在。在try里执行filelist[i].delete();就能删除文件。
请问这是怎么回事呢?
#11
没道理的么
#12
我也觉得疑惑啊~~~~~~~~~~~~~~