split

时间:2021-04-29 23:51:50
 import java.io.IOException;

 import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; /**
* 解析知网文章的页面内容的代码
*
*/
public class Kns50onepage {
public static void main(String[] args) throws IOException { String url="http://www.cfed.cnki.net/kns50/detail.aspx?filename=GLXB201301003&dbname=CFJD2013&filetitle=%E7%9B%8A%E7%BB%84%E7%BB%87%E8%A1%8C%E4%B8%BA%E4%B8%8E%E6%8D%9F%E7%BB%84%E7%BB%87%E8%A1%8C%E4%B8%BA%3a%E4%B8%AD%E5%9B%BD%E7%89%B9%E5%BE%81%E7%9A%84%E8%A7%92%E8%89%B2%E5%A4%96%E8%A1%8C%E4%B8%BA%E6%A8%A1%E5%9E%8B%E5%8F%8A%E5%85%B6%E7%BB%8F%E9%AA%8C%E5%AE%9E%E8%AF%81";
getContentByJsoup(url);
//getLinksByJsoup(divContent); }
public static void getContentByJsoup(String url){
//解析整个网页
String content="";
try {
Document doc=Jsoup.connect(url)
.data("jquery", "java")
.userAgent("Mozilla")
.cookie("auth", "token")
.timeout(50000)
.get(); content=doc.toString();
} catch (IOException e) {
e.printStackTrace();
} Document doc=Jsoup.parse(content); String title=doc.select("span.datatitle").get(1).text();
System.out.println("标题:"+title); String author=doc.select("td").text().split("【作者】")[1].split("【")[0];
System.out.println("作者:"+author); String summary=doc.select("td").text().split("【中文摘要】")[1].split("【")[0];
System.out.println("中文摘要:"+summary); } }