首页 > 代码库 > java 如何获取网页的动态内容,并解析网页内容
java 如何获取网页的动态内容,并解析网页内容
(笔记)
获取网页的动态内容参考
https://pastebin.com/raw/FePkm2kp
Maven:
<!--获取网页源码,包括动态内容-->
<dependency>
<groupId>htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>1.14</version>
</dependency>
实现:
WebClient webClient = new WebClient(); //don‘t disable it if you want JS ENABLED; webClient.setCssEnabled(false); webClient.setAppletEnabled(false); webClient.setJavaScriptEnabled(false); webClient.setTimeout(7000); WebRequest request = new WebRequest(new URL("http://portal.hud.gov/portal/page/portal/HUD/")); //request.setAdditionalHeader("Authorization", "Basic bG9hbnNkZXY6bG9AbnNkM3Y="); Page page = webClient.getPage(request); String contentType = page.getWebResponse().getContentType(); int statusCode = page.getWebResponse().getStatusCode(); String statusMessage = page.getWebResponse().getStatusMessage(); long loadTime = page.getWebResponse().getLoadTime(); System.out.println(statusCode); System.out.println(statusMessage); System.out.println(contentType); System.out.println(loadTime); System.out.println(page.getWebResponse().getContentAsString());
https://stackoverflow.com/questions/42446990/parse-html-table-to-json-using-jsoup-in-java
maven:
<!--将html转换为Map-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.3</version>
</dependency>
实现:
public String TableToJson(String source) throws JSONException {
Document doc = Jsoup.parse(source);
JSONObject jsonParentObject = new JSONObject();
//JSONArray list = new JSONArray();
for (Element table : doc.select("table")) {
for (Element row : table.select("tr")) {
JSONObject jsonObject = new JSONObject();
Elements tds = row.select("td");
String Name = tds.get(0).text();
String Group = tds.get(1).text();
String Code = tds.get(2).text();
String Lesson = tds.get(3).text();
String Day1 = tds.get(4).text();
String Day2 = tds.get(5).text();
String Day3= tds.get(6).text();
jsonObject.put("Group", Group);
jsonObject.put("Code", Code);
jsonObject.put("Lesson", Lesson);
jsonObject.put("Day1", Day1);
jsonObject.put("Day2", Day2);
jsonObject.put("Day3", Day3);
jsonParentObject.put(Name,jsonObject);