jsoup菜鸟练习

时间:2024-04-09 20:58:55

Jsoup学习文档

1  maven依赖

<dependency>
  <!-- jsoup HTML parser library @ https://jsoup.org/ -->
  <groupId>org.jsoup</groupId>
  <artifactId>jsoup</artifactId>
  <version>1.11.2</version>
</dependency>

2 api地址

http://www.open-open.com/jsoup/

3 实际操作 demo实例

解析内涵段子文章

http://neihanshequ.com/p90768347610/

 jsoup菜鸟练习

Html源码

 jsoup菜鸟练习

 

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

/**
 * 使用jsoup解析html
 *
 */
public class JsoupZX {

    public static void main(String[] args) {
        final String url="http://neihanshequ.com/p90768347610";
        try {
            Document document= Jsoup.connect(url).get();//连接网址,获取Document对象
            
Elements context=document.getElementsByClass("content");//获取div[id=context]的对象
            
Document contextDoc=Jsoup.parse(context.toString());
            Elements detail=contextDoc.getElementsByClass("detail-wrapper");
            Document detailDoc=Jsoup.parse(detail.toString());
            String userInfo = detailDoc.select("a").get(0).attr("href");//获取用户信息
            
System.out.println("userInfo:"+userInfo);
            String imgUrl= detailDoc.select("img").get(1).attr("data-src");//获取用户头像
            
System.out.println("imgUrl:"+imgUrl);
            String author=detailDoc.getElementsByClass("name").get(0).text();//获取作者名称
            
System.out.println("author:"+author);
            String creattime=detailDoc.getElementsByClass("time timeago").text();//获取作者创建时间
            
System.out.println("creattime:"+creattime);
            String title=detailDoc.select("h1 p").get(0).text();//获取段子内容
            
System.out.println("title:"+title);
            String hotauthor=detailDoc.getElementsByClass("name").get(1).text();//获取作者名称
            
System.out.println("hotauthor = " + hotauthor);
            String  hotComment=detailDoc.select("div p").get(1).text();//获取热评
            
System.out.println("hotComment = " + hotComment);
            String dig=detailDoc.getElementsByClass("digg").get(0).text();//点赞
            
System.out.println("dig = " + dig);
            String bury=detailDoc.getElementsByClass("bury").text();//踩
            
System.out.println("bury = " + bury);
            String repin=detailDoc.getElementsByClass("repin").text();//收藏
            
System.out.println("repin = " + repin);
            String comment = detailDoc.getElementsByClass("comment").text();//评论
            
System.out.println("comment = " + comment);
            String share = detailDoc.getElementsByClass("share").text();//转发
            
System.out.println("share = " + share);

        } catch (IOException e) {
            e.printStackTrace();
        }

    }
}