一般的网页可以直接通过URL直接获取HTML的代码
public static void main(String[] args) throws Exception { //获取url URL url = new URL("https://www.baidu.com"); //下载资源 InputStream is = url.openStream(); BufferedReader br = new BufferedReader(new InputStreamReader(is,"utf-8")); String msg = null; while (null != (msg = br.readLine())){ System.out.println(msg); } br.close(); }
但是有一些网站是不能直接访问的,此时我们需要模拟浏览器再去获取代码
public static void main(String[] args) throws Exception { //获取url URL url = new URL("https://www.dianping.com"); //下载资源 HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36"); BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(),"utf-8")); String msg = null; while (null != (msg = br.readLine())){ System.out.println(msg); } br.close(); }