java代码:
import java.io.File;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
public class parser1 {
public static void main(String[] args) {
File file = new File("E:\\STOCK\\html\\2.html");
try {
Parser parser = new Parser(file.getAbsolutePath());
parser.setEncoding("UTF-8");
NodeFilter filter= new AndFilter(new TagNameFilter("div"),
new HasAttributeFilter("class", "search_feed"));
NodeList nodeList = parser.extractAllNodesThatMatch(filter);
System.out.println("flter3大小="+nodeList.size());
} catch (Throwable e) {
e.printStackTrace();
}
}
}
2.html内容如下:
<script>STK && STK.pageletM && STK.pageletM.view({"pid":"pl_weibo_feedlist","js":["apps\/search\/js\/pl\/weibo\/feedList.js?version=201312061633"],"css":["appstyle\/searchV45\/css\/pl\/pl_feed.css?version=201312061633","appstyle\/searchV45\/css\/pl\/pl_page.css?version=201312061633","appstyle\/searchV45\/css\/pl\/pl_shortlink.css?version=201312061633","appstyle\/searchV45\/css\/pl\/pl_prizeper.css?version=201312061633"],"html":" <div class=\"search_feed\">\n <div class=\"feed_lists W_linka W_texta\" node-type=\"feed_list\">\n <a node-type=\"feed_list_newBar\" href=\"javascript:void(0);\" style=\"display:none\" class=\"notes\" suda-data=\"key=tblog_search_v4.1&value=weibo_new\">\u6709 20 \u6761\u65b0\u5fae\u535a\uff0c\u70b9\u51fb\u67e5\u770b<\/a>\n \n<dl class=\"feed_list\" mid=\"3652848963153960\" action-type=\"feed_list_item\" isforward=\"1\">\n <dt class=\"face\">\n <a href=\"http:\/\/weibo.com\/ethangaostudio\" title=\"\u9ad8\u627f\u661f\" target=\"_blank\" suda-data=\"key=tblog_search_v4.1&value=weibo_feed_1:2004370851\">\n <img src=\"http:\/\/tp4.sinaimg.cn\/2004370851\/50\/5659234133\/1\" alt=\"\u9ad8\u627f\u661f\" width=\"50\" height=\"50\" usercard=\"id=2004370851&usercardkey=weibo_mp\"\/>\n <\/a>\n <\/dt>\n <dd class=\"content\">\n <p node-type=\"feed_list_content\">\n <a nick-name=\"\u9ad8\u627f\u661f\" href=\"http:\/\/weibo.com\/ethangaostudio\" target=\"_blank\" title=\"\u9ad8\u627f\u661f\" usercard=\"id=2004370851&usercardkey=weibo_mp\" suda-data=\"key=tblog_search_v4.1&value=weibo_feed_1:2004370851\">\u9ad8\u627f\u661f<a href=\"http:\/\/vip.weibo.com\/personal?from=search\" target=\"_blank\"><img class=\"ico_member\" alt=\"\u5fae\u535a\u4f1a\u5458\" title=\"\u5fae\u535a\u4f1a\u5458\" src=\"http:\/\/img.t.sinajs.cn\/t4\/style\/images\/common\/transparent.gif\"><\/a><\/a>\uff1a<em>\u8fd9\u4e2a0.<span style=\"color:red;\">000001<\/span>btc\u53bb\u8c01\u94b1\u5305\u4e86\uff1f \/\/<a href=\"http:\/\/weibo.com\/n\/Yanxi-Com\" usercard=\"name=Yanxi-Com\" >@Yanxi-Com<\/a>:\u56de\u590d<a href=\"http:\/\/weibo.com\/n\/BTC123_%E6%AF%94%E7%89%B9%E5%B8%81%E5%AF%BC%E8%88%AA\" usercard=\"name=BTC123_\u6bd4\u7279\u5e01\u5bfc\u822a\" >@BTC123_\u6bd4\u7279\u5e01\u5bfc\u822a<\/a>:\u4ea4\u6613\u6240\u7b49\u673a\u6784\u5982\u679c\u5c06\u7528\u6237\u5145\u503c\u7684\u6563\u94b1\u8f6c\u5230\u540c\u4e00\u4e2a\u5730\u5740\u4e0a\uff0c\u5c82\u4e0d\u662f\u8981\u82b1\u6389\u5927\u91cf\u652f\u4ed8\u6210\u672c\uff08\u4e00\u822c\u6bcf\u7b140.0001BTC\uff09\u3002\u800c\u8fd9\u4e2a\u6210\u672c\u662f\u5b8c\u5168\u6ca1\u5fc5\u8981\u7684\uff0c\u6211\u89c9\u5f97\u4ed6\u4eec\u4e0d\u4f1a\u8fd9\u4e48\u5e72\u3002<\/em>\n <\/p>\n <\/div>\n<\/div>\n"})</script>
2 个解决方案
#1
你这是js函数啊。
#2
嗯嗯,后来又知道了,谢了
#1
你这是js函数啊。
#2
嗯嗯,后来又知道了,谢了