java爬虫（Jsoup爬取龙腾网）第一页信息

如果不太懂Jsoup，看看这里就会了：http://blog.csdn.net/disiwei1012/article/details/51614177

要爬取的网站：http://www.ltaaa.com

======================下面是正文=========================

先看下需要爬取的网页
java爬虫（Jsoup爬取龙腾网）第一页信息

先看下龙腾网：http://www.ltaaa.com的firebug
java爬虫（Jsoup爬取龙腾网）第一页信息

再看下我抓取后的：
java爬虫（Jsoup爬取龙腾网）第一页信息

实体：

public class News {

private int id; 

private String title;
private String href;
private String content;
private String imghref;

public News() {}

public News(String title,String href,String content,String imghref,int id){
this.imghref = imghref;
this.title = title;
this.content = content;
this.href = href;
this.id = id;
    }


public int getId() {
return id;
    }

public void setId(int id) {
this.id = id;
    }

public String getImghref() {
return imghref;
    }

public void setImghref(String imghref) {
this.imghref = imghref;
    }

public String getTitle() {
return title;
    }

public void setTitle(String title) {
this.title = title;
    }

public String getHref() {
return href;
    }

public void setHref(String href) {
this.href = href;
    }

public String getContent() {
return content;
    }

public void setContent(String content) {
this.content = content;
    }

}

代码

public class JsoupLong {

/**
 * @param args
 */
    public static void main(String[] args) throws Exception{
        getNews();
    }

    static ArrayList getNews() throws Exception{

        ArrayList<News> newsList = new ArrayList<News>();

        String url = "http://www.ltaaa.com";
        Document doc = Jsoup.connect(url).get();
        Element element = doc.getElementById("ngro1");
        Elements elements = element.getElementsByTag("li");
        int i = 1;

        for(Element ele:elements){
            News news = new News();
            Element img = ele.select("img").first();
            Element title = ele.select("a").first();
            news.setImghref(img.attr("src"));
            news.setTitle(title.text());
            news.setHref(url+title.attr("href"));
            news.setId(i++);
            newsList.add(news);
        }

        File file = new File("d://longteng.html");
        FileWriter fw = new FileWriter(file);
        fw.write("<html><head><meta charset='UTF-8'><title>by dqf</title></head><body><center>");
        fw.write("<br><br><br><br><table>");
        fw.write("<tr style='background-color: gray'>");
        fw.write("<td align='center'>id</td>");
        fw.write("<td align='center'>国籍</td>");
        fw.write("<td align='center'>标题</td>");
        fw.write("</tr>");

        for(News news:newsList){
            fw.write("<tr>");
                fw.write("<td>"+news.getId()+"</td>");
                fw.write("<td><img src='"+news.getImghref()+"'></td>");
                fw.write("<td><a href='"+news.getHref()+"'>"+news.getTitle()+"</td>");
            fw.write("</tr>");
        }

        fw.write("</table></body></center></html>");
        fw.flush();
        fw.close();
        return null;
    }

}

秒客网

java爬虫（Jsoup爬取龙腾网）第一页信息

======================下面是正文=========================

相关文章