首页 > 代码库 > 简单爬虫 从指定地址下载网站内容
简单爬虫 从指定地址下载网站内容
Http01App.java
1.使用了多线程、io流,net(网络包)
package main;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
/**
* Created by lxj-pc on 2017/6/27.
*/
public class Http01App {
public static void main(String[] args) {
String url = "http://tuijian.hao123.com:80/index.html";
//启动线程下载指定位置的html内容
new Thread(new DownloadHtmlTask(url)).start();
//下载指定的html内容,并存在d://html/hao123.html
}
static class DownloadHtmlTask implements Runnable {
private String url;
String fileName = "hao123.html";
String dirPath = "d:/lxj";
public DownloadHtmlTask(String url) {
this.url = url;
}
@Override
public void run() {
//下载url指定的HTMl网页内容
try {
URL htmlURL = new URL(url);
//打开网络资源连接
try {
URLConnection urlConnection = htmlURL.openConnection();//.filed
HttpURLConnection conn = (HttpURLConnection) urlConnection;
//获取网络资源的读取流
InputStream is = conn.getInputStream();
//判断网络资源响应是否成功
if (conn.getResponseCode() == 200) {
//内存流 ByteArrayOutputStream
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[20 * 1024];//每次读取最大内存大小20k,缓冲大小
int len = -1; //每次读取的字节长度
//开始读取网路数据
//检测文件下载的进度
//1.获取网络资源的总长度
int contentLength = conn.getContentLength();
//2.声明当前已读取的资源长度,累加len
int curLen = 0;
while ((len = is.read(buffer)) != -1) {
//将读取的数据读取写入内存流中
baos.write(buffer, 0, len);
//3.计算下载进度
curLen += len;
System.out.println(curLen + " " + contentLength);
//4.计算下载进度
int p = curLen / contentLength;
System.out.println("下载进度" + p + "%");
}
//下载完成,获取内存流中的数据
byte[] bytes = baos.toByteArray();
//将字节数组转成字符串,并打印到控制台
// "hello".getBytes(); zifu->zijie
String htmlContent = new String(bytes, "utf-8");
writerFile(htmlContent, dirPath, fileName);//成员方法在下面
System.out.println(htmlContent);
// System.out.println(htmlContent);
}} catch (IOException e) {
e.printStackTrace();
}
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
//将文件存储在指定的path的文件中
private void writerFile(String htmlContent, String dirPath, String fileName) {
File dir = new File(dirPath);
FileWriter fileWriter = null;
try {
fileWriter = new FileWriter(new File(dir, fileName));
} catch (IOException e) {
e.printStackTrace();
}
try {
fileWriter.write(htmlContent);
} catch (IOException e) {
e.printStackTrace();
}
try {
fileWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void outputFile(String content,String dirPath,String fileName){
File dir=new File(dirPath);
try {
FileOutputStream fileOutputStream=new FileOutputStream(new File(dir,fileName));
fileOutputStream.write(content.getBytes("utf-8"));
fileOutputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
简单爬虫 从指定地址下载网站内容
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。