这节主要是讲解jsoup解析HTML页面。由于在android开发过程中,不可避免的涉及到web页面的抓取,解析,展示等等,所以,在这里我主要展示下利用jsoup jar包来抓取cnbeta.com网站的话题分类的实例。
下面是主要的代码,由于使用及其简单,我这里就不再多说了:
Codepackage com.android.web; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.http.util.ByteArrayBuffer; import org.apache.http.util.EncodingUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import android.app.Activity; import android.os.Bundle; import android.view.View; import android.view.View.OnClickListener; import android.widget.ListView; import android.widget.SimpleAdapter; public class _GetWebResoureActivity extends Activity { Document doc; @Override public void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.main); findViewById(R.id.button1).setOnClickListener(new OnClickListener() { @Override public void onClick(View v) { load(); } }); } protected void load() { try { doc = Jsoup.parse(new URL("http://www.cnbeta.com"), 5000); } catch (MalformedURLException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } List<Map<String, String>> list = new ArrayList<Map<String, String>>(); Elements es = doc.getElementsByClass("main_navi"); for (Element e : es) { Map<String, String> map = new HashMap<String, String>(); map.put("title", e.getElementsByTag("a").text()); map.put("href", "http://www.cnbeta.com" + e.getElementsByTag("a").attr("href")); list.add(map); } ListView listView = (ListView) findViewById(R.id.listView1); listView.setAdapter(new SimpleAdapter(this, list, android.R.layout.simple_list_item_2, new String[] { "title","href" }, new int[] { android.R.id.text1,android.R.id.text2 })); } /** * @param urlString * @return */ public String getHtmlString(String urlString) { try { URL url = null; url = new URL(urlString); URLConnection ucon = null; ucon = url.openConnection(); InputStream instr = null; instr = ucon.getInputStream(); BufferedInputStream bis = new BufferedInputStream(instr); ByteArrayBuffer baf = new ByteArrayBuffer(500); int current = 0; while ((current = bis.read()) != -1) { baf.append((byte) current); } return EncodingUtils.getString(baf.toByteArray(), "gbk"); } catch (Exception e) { return ""; } } }
注意代码中黄色标注部分,一定要找对位置,才能得到正确的结果。下面就是主要的预览效果: