首页 > 代码库 > HttpClient+jsoup登录+解析 163邮箱
HttpClient+jsoup登录+解析 163邮箱
找了几个,只有这个靠谱,用的是httpclient4,另外还需要commons-lang和jsoup包
http://jsoup.org/
http://www.oschina.net/code/snippet_128625_12592?p=2
————————————————————————————————————————————————————————————
如题:
支用用jsoup解析页面非常方便,当时jsoup做登录就比较麻烦,反正我不知道怎么做。
HttpClient做登录比较方便因此用HttpClient摸得登录获取html内容用jsoup做解析是一个非常完美的组合
替换自己的163邮箱看一下吧。
支用用jsoup解析页面非常方便,当时jsoup做登录就比较麻烦,反正我不知道怎么做。
HttpClient做登录比较方便因此用HttpClient摸得登录获取html内容用jsoup做解析是一个非常完美的组合
替换自己的163邮箱看一下吧。
HttpClientHelper 封装
import java.io.IOException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.lang.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* HttpClient 封装
*
* @author bangis.wangdf
*/
public class HttpClientHelper {
private static Logger LOG = LoggerFactory.getLogger(HttpClientHelper.class);
private HttpClient httpclient = new DefaultHttpClient();
private HttpContext localContext = new BasicHttpContext();
private BasicCookieStore basicCookieStore = new BasicCookieStore(); // cookie存储用来完成登录后记录相关信息
private int TIME_OUT = 3; // 连接超时时间
public HttpClientHelper() {
instance();
}
/**
* 启用cookie存储
*/
private void instance() {
httpclient.getParams().setIntParameter("http.socket.timeout", TIME_OUT * 1000);
localContext.setAttribute("http.cookie-store", basicCookieStore);// Cookie存储
}
/**
* @param ssl boolean=true 支持https网址,false同默认构造
*/
public HttpClientHelper(boolean ssl) {
instance();
if (ssl) {
try {
X509TrustManager tm = new X509TrustManager() {
public void checkClientTrusted(X509Certificate[] xcs, String string) throws CertificateException {
}
public void checkServerTrusted(X509Certificate[] xcs, String string) throws CertificateException {
}
public X509Certificate[] getAcceptedIssuers() {
return null;
}
};
SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, new TrustManager[] { tm }, null);
SSLSocketFactory ssf = new SSLSocketFactory(ctx);
ClientConnectionManager ccm = httpclient.getConnectionManager();
SchemeRegistry sr = ccm.getSchemeRegistry();
sr.register(new Scheme("https", ssf, 443));
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* @param url
* @param headers 指定headers
* @return
*/
public HttpResult get(String url, Header... headers) {
HttpResponse response;
HttpGet httpget = new HttpGet(url);
if (headers != null) {
for (Header h : headers) {
httpget.addHeader(h);
}
} else {// 如不指定则使用默认
Header header = new BasicHeader(
"User-Agent",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; InfoPath.2)");
httpget.addHeader(header);
}
HttpResult httpResult = HttpResult.empty();
try {
response = httpclient.execute(httpget, localContext);
httpResult = new HttpResult(localContext, response);
} catch (IOException e) {
LOG.error(" get ", e);
httpget.abort();
}
return httpResult;
}
public HttpResult post(String url, Map<String, String> data, Header... headers) {
HttpResponse response;
HttpPost httppost = new HttpPost(url);
String contentType = null;
if (headers != null) {
int size = headers.length;
for (int i = 0; i < size; ++i) {
Header h = (Header) headers[i];
if (!(h.getName().startsWith("$x-param"))) {
httppost.addHeader(h);
}
if ("Content-Type".equalsIgnoreCase(h.getName())) {
contentType = h.getValue();
}
}
}
if (contentType != null) {
httppost.setHeader("Content-Type", contentType);
} else if (data != null) {
httppost.setHeader("Content-Type", "application/x-www-form-urlencoded");
}
List<NameValuePair> formParams = new ArrayList<NameValuePair>();
for (String key : data.keySet()) {
formParams.add(new BasicNameValuePair(key, (String) data.get(key)));
}
HttpResult httpResult = HttpResult.empty();
try {
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formParams, "UTF-8");
httppost.setEntity(entity);
response = httpclient.execute(httppost, localContext);
httpResult = new HttpResult(localContext, response);
} catch (IOException e) {
LOG.error(" post ", e);
httppost.abort();
} finally {
}
return httpResult;
}
public String getCookie(String name, String... domain) {
String dm = "";
if (domain != null && domain.length >= 1) {
dm = domain[0];
}
for (Cookie c : basicCookieStore.getCookies()) {
if (StringUtils.equals(name, c.getName()) && StringUtils.equals(dm, c.getDomain())) {
return c.getValue();
}
}
return null;
}
public void pringCookieAll() {
for (Cookie c : basicCookieStore.getCookies()) {
System.out.println(c);
}
}
}
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.lang.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* HttpClient 封装
*
* @author bangis.wangdf
*/
public class HttpClientHelper {
private static Logger LOG = LoggerFactory.getLogger(HttpClientHelper.class);
private HttpClient httpclient = new DefaultHttpClient();
private HttpContext localContext = new BasicHttpContext();
private BasicCookieStore basicCookieStore = new BasicCookieStore(); // cookie存储用来完成登录后记录相关信息
private int TIME_OUT = 3; // 连接超时时间
public HttpClientHelper() {
instance();
}
/**
* 启用cookie存储
*/
private void instance() {
httpclient.getParams().setIntParameter("http.socket.timeout", TIME_OUT * 1000);
localContext.setAttribute("http.cookie-store", basicCookieStore);// Cookie存储
}
/**
* @param ssl boolean=true 支持https网址,false同默认构造
*/
public HttpClientHelper(boolean ssl) {
instance();
if (ssl) {
try {
X509TrustManager tm = new X509TrustManager() {
public void checkClientTrusted(X509Certificate[] xcs, String string) throws CertificateException {
}
public void checkServerTrusted(X509Certificate[] xcs, String string) throws CertificateException {
}
public X509Certificate[] getAcceptedIssuers() {
return null;
}
};
SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, new TrustManager[] { tm }, null);
SSLSocketFactory ssf = new SSLSocketFactory(ctx);
ClientConnectionManager ccm = httpclient.getConnectionManager();
SchemeRegistry sr = ccm.getSchemeRegistry();
sr.register(new Scheme("https", ssf, 443));
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* @param url
* @param headers 指定headers
* @return
*/
public HttpResult get(String url, Header... headers) {
HttpResponse response;
HttpGet httpget = new HttpGet(url);
if (headers != null) {
for (Header h : headers) {
httpget.addHeader(h);
}
} else {// 如不指定则使用默认
Header header = new BasicHeader(
"User-Agent",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; InfoPath.2)");
httpget.addHeader(header);
}
HttpResult httpResult = HttpResult.empty();
try {
response = httpclient.execute(httpget, localContext);
httpResult = new HttpResult(localContext, response);
} catch (IOException e) {
LOG.error(" get ", e);
httpget.abort();
}
return httpResult;
}
public HttpResult post(String url, Map<String, String> data, Header... headers) {
HttpResponse response;
HttpPost httppost = new HttpPost(url);
String contentType = null;
if (headers != null) {
int size = headers.length;
for (int i = 0; i < size; ++i) {
Header h = (Header) headers[i];
if (!(h.getName().startsWith("$x-param"))) {
httppost.addHeader(h);
}
if ("Content-Type".equalsIgnoreCase(h.getName())) {
contentType = h.getValue();
}
}
}
if (contentType != null) {
httppost.setHeader("Content-Type", contentType);
} else if (data != null) {
httppost.setHeader("Content-Type", "application/x-www-form-urlencoded");
}
List<NameValuePair> formParams = new ArrayList<NameValuePair>();
for (String key : data.keySet()) {
formParams.add(new BasicNameValuePair(key, (String) data.get(key)));
}
HttpResult httpResult = HttpResult.empty();
try {
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formParams, "UTF-8");
httppost.setEntity(entity);
response = httpclient.execute(httppost, localContext);
httpResult = new HttpResult(localContext, response);
} catch (IOException e) {
LOG.error(" post ", e);
httppost.abort();
} finally {
}
return httpResult;
}
public String getCookie(String name, String... domain) {
String dm = "";
if (domain != null && domain.length >= 1) {
dm = domain[0];
}
for (Cookie c : basicCookieStore.getCookies()) {
if (StringUtils.equals(name, c.getName()) && StringUtils.equals(dm, c.getDomain())) {
return c.getValue();
}
}
return null;
}
public void pringCookieAll() {
for (Cookie c : basicCookieStore.getCookies()) {
System.out.println(c);
}
}
}
对HttpClient返回的结果进一步封装
import java.io.IOException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.lang.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 对HttpClient返回的结果进一步封装
* @author bangis.wangdf
*
*/
public class HttpResult {
private static Logger LOG = LoggerFactory.getLogger(HttpResult.class);
private static Pattern headerCharsetPattern = Pattern.compile(
"charset=((gb2312)|(gbk)|(utf-8))", 2);
private static Pattern pattern = Pattern
.compile(
"<meta[^>]*content=([‘\"])?[^>]*charset=((gb2312)|(gbk)|(utf-8))\\1[^>]*>",
2);
private String headerCharset;
private String headerContentType;
private String headerContentEncoding;
private List<Header> headers;
private String metaCharset;
private byte[] response;
private String responseUrl;
private int statuCode = -1;
private static final int BUFFER_SIZE = 4096;
public static HttpResult empty() {
return new HttpResult();
}
public String getHeaderCharset() {
return this.headerCharset;
}
public String getHeaderContentType() {
return this.headerContentType;
}
public final List<Header> getHeaders() {
return this.headers;
}
public String getHtml() {
try {
return getText();
} catch (UnsupportedEncodingException e) {
LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
}
return "";
}
public String getHtml(String encoding) {
try {
return getText(encoding);
} catch (UnsupportedEncodingException e) {
LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
}
return "";
}
public String getMetaCharset() {
return this.metaCharset;
}
public byte[] getResponse() {
return Arrays.copyOf(this.response, this.response.length);
}
public String getResponseUrl() {
return this.responseUrl;
}
public int getStatuCode() {
return this.statuCode;
}
public String getText() throws UnsupportedEncodingException {
return getText("");
}
public String getText(String encoding) throws UnsupportedEncodingException {
if (this.response == null){
return "";
}
String encodingStr = encoding;
if (StringUtils.isBlank(encoding)){
encodingStr = this.metaCharset;
}
if (StringUtils.isBlank(encoding)){
encodingStr = this.headerCharset;
}
if (StringUtils.isBlank(encoding)){
encodingStr = "UTF-8";
}
return new String(this.response, encodingStr);
}
private String getCharsetFromMeta() {
StringBuilder builder = new StringBuilder();
String charset = "";
for (int i = 0; (i < this.response.length) && ("".equals(charset)); ++i) {
char c = (char) this.response[i];
switch (c) {
case ‘<‘:
builder.delete(0, builder.length());
builder.append(c);
break;
case ‘>‘:
if (builder.length() > 0){
builder.append(c);
}
String meta = builder.toString();
if (meta.toLowerCase().startsWith("<meta")){
charset = getCharsetFromMeta(meta);
}
break;
case ‘=‘:
default:
if (builder.length() > 0){
builder.append(c);
}
}
}
return charset;
}
private String getCharsetFromMeta(String meta) {
if (StringUtils.isBlank(meta)){
return "";
}
Matcher m = pattern.matcher(meta);
if (m.find()){
return m.group(2);
}
return "";
}
private void getHttpHeaders(HttpResponse httpResponse) {
String headerName = "";
String headerValue = "";
int index = -1;
Header[] rspHeaders = httpResponse.getAllHeaders();
for (int i = 0; i < rspHeaders.length; ++i) {
Header header = rspHeaders[i];
this.headers.add(header);
headerName = header.getName();
if ("Content-Type".equalsIgnoreCase(headerName)) {
headerValue = header.getValue();
index = headerValue.indexOf(‘;‘);
if (index > 0){
this.headerContentType = headerValue.substring(0, index);
}
Matcher m = headerCharsetPattern.matcher(headerValue);
if (m.find()){
this.headerCharset = m.group(1);
}
}
if ("Content-Encoding".equalsIgnoreCase(headerName)){
this.headerContentEncoding = header.getValue();
}
}
}
private void getResponseUrl(HttpContext httpContext) {
HttpHost target = (HttpHost) httpContext
.getAttribute("http.target_host");
HttpUriRequest req = (HttpUriRequest) httpContext
.getAttribute("http.request");
this.responseUrl = target.toString() + req.getURI().toString();
}
public HttpResult(HttpContext httpContext, HttpResponse httpResponse) {
this.headers = new ArrayList<Header>();
this.statuCode = httpResponse.getStatusLine().getStatusCode();
if (httpContext != null) {
getResponseUrl(httpContext);
}
if (httpResponse != null) {
getHttpHeaders(httpResponse);
try {
if (("gzip".equalsIgnoreCase(this.headerContentEncoding))
|| ("deflate".equalsIgnoreCase(this.headerContentEncoding))) {
GZIPInputStream is = new GZIPInputStream(httpResponse.getEntity().getContent());
ByteArrayOutputStream os = new ByteArrayOutputStream();
byte[] buffer = new byte[BUFFER_SIZE];
int count = 0;
while ((count = is.read(buffer)) > 0){
os.write(buffer, 0, count);
}
this.response = os.toByteArray();
os.close();
is.close();
}else{
this.response = EntityUtils.toByteArray(httpResponse.getEntity());
}
} catch (Exception e) {
LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
}
if (this.response != null){
this.metaCharset = getCharsetFromMeta();
}
}
}
private HttpResult() {
}
}
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.lang.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 对HttpClient返回的结果进一步封装
* @author bangis.wangdf
*
*/
public class HttpResult {
private static Logger LOG = LoggerFactory.getLogger(HttpResult.class);
private static Pattern headerCharsetPattern = Pattern.compile(
"charset=((gb2312)|(gbk)|(utf-8))", 2);
private static Pattern pattern = Pattern
.compile(
"<meta[^>]*content=([‘\"])?[^>]*charset=((gb2312)|(gbk)|(utf-8))\\1[^>]*>",
2);
private String headerCharset;
private String headerContentType;
private String headerContentEncoding;
private List<Header> headers;
private String metaCharset;
private byte[] response;
private String responseUrl;
private int statuCode = -1;
private static final int BUFFER_SIZE = 4096;
public static HttpResult empty() {
return new HttpResult();
}
public String getHeaderCharset() {
return this.headerCharset;
}
public String getHeaderContentType() {
return this.headerContentType;
}
public final List<Header> getHeaders() {
return this.headers;
}
public String getHtml() {
try {
return getText();
} catch (UnsupportedEncodingException e) {
LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
}
return "";
}
public String getHtml(String encoding) {
try {
return getText(encoding);
} catch (UnsupportedEncodingException e) {
LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
}
return "";
}
public String getMetaCharset() {
return this.metaCharset;
}
public byte[] getResponse() {
return Arrays.copyOf(this.response, this.response.length);
}
public String getResponseUrl() {
return this.responseUrl;
}
public int getStatuCode() {
return this.statuCode;
}
public String getText() throws UnsupportedEncodingException {
return getText("");
}
public String getText(String encoding) throws UnsupportedEncodingException {
if (this.response == null){
return "";
}
String encodingStr = encoding;
if (StringUtils.isBlank(encoding)){
encodingStr = this.metaCharset;
}
if (StringUtils.isBlank(encoding)){
encodingStr = this.headerCharset;
}
if (StringUtils.isBlank(encoding)){
encodingStr = "UTF-8";
}
return new String(this.response, encodingStr);
}
private String getCharsetFromMeta() {
StringBuilder builder = new StringBuilder();
String charset = "";
for (int i = 0; (i < this.response.length) && ("".equals(charset)); ++i) {
char c = (char) this.response[i];
switch (c) {
case ‘<‘:
builder.delete(0, builder.length());
builder.append(c);
break;
case ‘>‘:
if (builder.length() > 0){
builder.append(c);
}
String meta = builder.toString();
if (meta.toLowerCase().startsWith("<meta")){
charset = getCharsetFromMeta(meta);
}
break;
case ‘=‘:
default:
if (builder.length() > 0){
builder.append(c);
}
}
}
return charset;
}
private String getCharsetFromMeta(String meta) {
if (StringUtils.isBlank(meta)){
return "";
}
Matcher m = pattern.matcher(meta);
if (m.find()){
return m.group(2);
}
return "";
}
private void getHttpHeaders(HttpResponse httpResponse) {
String headerName = "";
String headerValue = "";
int index = -1;
Header[] rspHeaders = httpResponse.getAllHeaders();
for (int i = 0; i < rspHeaders.length; ++i) {
Header header = rspHeaders[i];
this.headers.add(header);
headerName = header.getName();
if ("Content-Type".equalsIgnoreCase(headerName)) {
headerValue = header.getValue();
index = headerValue.indexOf(‘;‘);
if (index > 0){
this.headerContentType = headerValue.substring(0, index);
}
Matcher m = headerCharsetPattern.matcher(headerValue);
if (m.find()){
this.headerCharset = m.group(1);
}
}
if ("Content-Encoding".equalsIgnoreCase(headerName)){
this.headerContentEncoding = header.getValue();
}
}
}
private void getResponseUrl(HttpContext httpContext) {
HttpHost target = (HttpHost) httpContext
.getAttribute("http.target_host");
HttpUriRequest req = (HttpUriRequest) httpContext
.getAttribute("http.request");
this.responseUrl = target.toString() + req.getURI().toString();
}
public HttpResult(HttpContext httpContext, HttpResponse httpResponse) {
this.headers = new ArrayList<Header>();
this.statuCode = httpResponse.getStatusLine().getStatusCode();
if (httpContext != null) {
getResponseUrl(httpContext);
}
if (httpResponse != null) {
getHttpHeaders(httpResponse);
try {
if (("gzip".equalsIgnoreCase(this.headerContentEncoding))
|| ("deflate".equalsIgnoreCase(this.headerContentEncoding))) {
GZIPInputStream is = new GZIPInputStream(httpResponse.getEntity().getContent());
ByteArrayOutputStream os = new ByteArrayOutputStream();
byte[] buffer = new byte[BUFFER_SIZE];
int count = 0;
while ((count = is.read(buffer)) > 0){
os.write(buffer, 0, count);
}
this.response = os.toByteArray();
os.close();
is.close();
}else{
this.response = EntityUtils.toByteArray(httpResponse.getEntity());
}
} catch (Exception e) {
LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
}
if (this.response != null){
this.metaCharset = getCharsetFromMeta();
}
}
}
private HttpResult() {
}
}
Mail163Test
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.Map;
import org.apache.http.Header;
import org.apache.http.message.BasicHeader;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class Mail163Test {
public static final String SESSION_INIT = "http://mail.163.com";
public static final String LOGIN_URL = "https://ssl.mail.163.com/entry/coremail/fcg/ntesdoor2?df=webmail163&from=web&funcid=loginone&iframe=1&language=-1&net=t&passtype=1&product=mail163&race=-2_-2_-2_db&style=-1&uid=";
public static final String MAIL_LIST_URL = "http://twebmail.mail.163.com/js4/s?sid={0}&func=mbox:listMessages";
/**
* @param args
*/
public static void main(String[] args) {
HttpClientHelper hc = new HttpClientHelper(true);
HttpResult lr = hc.get(SESSION_INIT);// 目的是得到 csrfToken 类似
// 拼装登录信息
Map<String, String> data = new HashMap<String, String>();
data.put("url2", "http://mail.163.com/errorpage/err_163.htm");
data.put("savelogin", "0");
data.put("username", "bangis");
data.put("password", "*******");
lr = hc.post(LOGIN_URL, data,setHeader());// 执行登录
Document doc = Jsoup.parse(lr.getHtml());
String sessionId=doc.select("script").html().split("=")[2];
sessionId = sessionId.substring(0,sessionId.length()-2);
data.clear();
data.put("var", "<?xml version=\"1.0\"?><object><int name=\"fid\">1</int><boolean name=\"skipLockedFolders\">false</boolean><string name=\"order\">date</string><boolean name=\"desc\">true</boolean><int name=\"start\">0</int><int name=\"limit\">50</int><boolean name=\"topFirst\">true</boolean><boolean name=\"returnTotal\">true</boolean><boolean name=\"returnTag\">true</boolean></object>");
lr = hc.post(MessageFormat.format(MAIL_LIST_URL, sessionId),
data,setQueryHeader(sessionId));// 执行登录
System.out.println(lr.getHtml());
}
public static Header[] setHeader() {
Header[] result = {
new BasicHeader("User-Agent","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"),
new BasicHeader("Accept-Encoding","gzip, deflate"),
new BasicHeader("Accept-Language","zh-CN"),
new BasicHeader("Cache-Control","no-cache"),
new BasicHeader("Connection","Keep-Alive"),
new BasicHeader("Content-Type","application/x-www-form-urlencoded"),
new BasicHeader("Host","ssl.mail.163.com"),
new BasicHeader("Referer","http://mail.163.com/"),
new BasicHeader("Accept","text/html, application/xhtml+xml, */*")
};
return result;
}
public static Header[] setQueryHeader(String sessionId) {
Header[] result = {
new BasicHeader("User-Agent","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"),
new BasicHeader("Accept-Encoding","gzip, deflate"),
new BasicHeader("Accept-Language","zh-CN"),
new BasicHeader("Cache-Control","no-cache"),
new BasicHeader("Connection","Keep-Alive"),
new BasicHeader("Content-Type","application/x-www-form-urlencoded"),
new BasicHeader("Host","twebmail.mail.163.com"),
new BasicHeader("Referer","http://twebmail.mail.163.com/js4/index.jsp?sid="+sessionId),
new BasicHeader("Accept","text/javascript")
};
return result;
}
}
import java.util.HashMap;
import java.util.Map;
import org.apache.http.Header;
import org.apache.http.message.BasicHeader;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class Mail163Test {
public static final String SESSION_INIT = "http://mail.163.com";
public static final String LOGIN_URL = "https://ssl.mail.163.com/entry/coremail/fcg/ntesdoor2?df=webmail163&from=web&funcid=loginone&iframe=1&language=-1&net=t&passtype=1&product=mail163&race=-2_-2_-2_db&style=-1&uid=";
public static final String MAIL_LIST_URL = "http://twebmail.mail.163.com/js4/s?sid={0}&func=mbox:listMessages";
/**
* @param args
*/
public static void main(String[] args) {
HttpClientHelper hc = new HttpClientHelper(true);
HttpResult lr = hc.get(SESSION_INIT);// 目的是得到 csrfToken 类似
// 拼装登录信息
Map<String, String> data = new HashMap<String, String>();
data.put("url2", "http://mail.163.com/errorpage/err_163.htm");
data.put("savelogin", "0");
data.put("username", "bangis");
data.put("password", "*******");
lr = hc.post(LOGIN_URL, data,setHeader());// 执行登录
Document doc = Jsoup.parse(lr.getHtml());
String sessionId=doc.select("script").html().split("=")[2];
sessionId = sessionId.substring(0,sessionId.length()-2);
data.clear();
data.put("var", "<?xml version=\"1.0\"?><object><int name=\"fid\">1</int><boolean name=\"skipLockedFolders\">false</boolean><string name=\"order\">date</string><boolean name=\"desc\">true</boolean><int name=\"start\">0</int><int name=\"limit\">50</int><boolean name=\"topFirst\">true</boolean><boolean name=\"returnTotal\">true</boolean><boolean name=\"returnTag\">true</boolean></object>");
lr = hc.post(MessageFormat.format(MAIL_LIST_URL, sessionId),
data,setQueryHeader(sessionId));// 执行登录
System.out.println(lr.getHtml());
}
public static Header[] setHeader() {
Header[] result = {
new BasicHeader("User-Agent","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"),
new BasicHeader("Accept-Encoding","gzip, deflate"),
new BasicHeader("Accept-Language","zh-CN"),
new BasicHeader("Cache-Control","no-cache"),
new BasicHeader("Connection","Keep-Alive"),
new BasicHeader("Content-Type","application/x-www-form-urlencoded"),
new BasicHeader("Host","ssl.mail.163.com"),
new BasicHeader("Referer","http://mail.163.com/"),
new BasicHeader("Accept","text/html, application/xhtml+xml, */*")
};
return result;
}
public static Header[] setQueryHeader(String sessionId) {
Header[] result = {
new BasicHeader("User-Agent","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"),
new BasicHeader("Accept-Encoding","gzip, deflate"),
new BasicHeader("Accept-Language","zh-CN"),
new BasicHeader("Cache-Control","no-cache"),
new BasicHeader("Connection","Keep-Alive"),
new BasicHeader("Content-Type","application/x-www-form-urlencoded"),
new BasicHeader("Host","twebmail.mail.163.com"),
new BasicHeader("Referer","http://twebmail.mail.163.com/js4/index.jsp?sid="+sessionId),
new BasicHeader("Accept","text/javascript")
};
return result;
}
}
HttpClient+jsoup登录+解析 163邮箱
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。