Jsoup学习文档
1 maven依赖
<dependency>
<!-- jsoup HTML parser library @ https://jsoup.org/ -->
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.2</version>
</dependency>
2 api地址
http://www.open-open.com/jsoup/
3 实际操作 demo实例
解析内涵段子文章
http://neihanshequ.com/p90768347610/
Html源码
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
/**
* 使用jsoup解析html
*
*/
public class JsoupZX {
public static void main(String[] args) {
final String url="http://neihanshequ.com/p90768347610";
try {
Document document= Jsoup.connect(url).get();//连接网址,获取Document对象
Elements context=document.getElementsByClass("content");//获取div[id=context]的对象
Document contextDoc=Jsoup.parse(context.toString());
Elements detail=contextDoc.getElementsByClass("detail-wrapper");
Document detailDoc=Jsoup.parse(detail.toString());
String userInfo = detailDoc.select("a").get(0).attr("href");//获取用户信息
System.out.println("userInfo:"+userInfo);
String imgUrl= detailDoc.select("img").get(1).attr("data-src");//获取用户头像
System.out.println("imgUrl:"+imgUrl);
String author=detailDoc.getElementsByClass("name").get(0).text();//获取作者名称
System.out.println("author:"+author);
String creattime=detailDoc.getElementsByClass("time timeago").text();//获取作者创建时间
System.out.println("creattime:"+creattime);
String title=detailDoc.select("h1 p").get(0).text();//获取段子内容
System.out.println("title:"+title);
String hotauthor=detailDoc.getElementsByClass("name").get(1).text();//获取作者名称
System.out.println("hotauthor = " + hotauthor);
String hotComment=detailDoc.select("div p").get(1).text();//获取热评
System.out.println("hotComment = " + hotComment);
String dig=detailDoc.getElementsByClass("digg").get(0).text();//点赞
System.out.println("dig = " + dig);
String bury=detailDoc.getElementsByClass("bury").text();//踩
System.out.println("bury = " + bury);
String repin=detailDoc.getElementsByClass("repin").text();//收藏
System.out.println("repin = " + repin);
String comment = detailDoc.getElementsByClass("comment").text();//评论
System.out.println("comment = " + comment);
String share = detailDoc.getElementsByClass("share").text();//转发
System.out.println("share = " + share);
} catch (IOException e) {
e.printStackTrace();
}
}
}