首页 > 代码库 > httpClient如何接收格式错误的响应头部信息
httpClient如何接收格式错误的响应头部信息
Exception in thread "main" org.apache.commons.httpclient.ProtocolException: Unable to parse header: share memory not exist, need create new share memory!at org.apache.commons.httpclient.HttpParser.parseHeaders(HttpParser.java:202)at org.apache.commons.httpclient.HttpMethodBase.readResponseHeaders(HttpMethodBase.java:1935)at org.apache.commons.httpclient.HttpMethodBase.readResponse(HttpMethodBase.java:1737)at org.apache.commons.httpclient.HttpMethodBase.execute(HttpMethodBase.java:1098)at org.apache.commons.httpclient.HttpMethodDirector.executeWithRetry(HttpMethodDirector.java:398)at org.apache.commons.httpclient.HttpMethodDirector.executeMethod(HttpMethodDirector.java:171)at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:397)at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:323)
做网页爬虫的时候, 模拟Get请求, 访问网址, 显示上面的错误异常。 问了3个前辈,都说没遇到过, 这可头疼屎了。
主要也不知道是什么问题, 有人说可能是buffer,把大小设置大点。花了一个晚上查网上资料, 有关终于有点眉目, 见参考网址 : http://bbs.csdn.net/topics/390178589
/** * */package com.http;import java.io.IOException;import org.apache.http.Header;import org.apache.http.HttpException;import org.apache.http.HttpResponse;import org.apache.http.HttpResponseFactory;import org.apache.http.HttpVersion;import org.apache.http.conn.ClientConnectionOperator;import org.apache.http.conn.OperatedClientConnection;import org.apache.http.conn.scheme.SchemeRegistry;import org.apache.http.impl.conn.BasicClientConnectionManager;import org.apache.http.impl.conn.DefaultClientConnection;import org.apache.http.impl.conn.DefaultClientConnectionOperator;import org.apache.http.impl.conn.DefaultHttpResponseParser;import org.apache.http.io.HttpMessageParser;import org.apache.http.io.SessionInputBuffer;import org.apache.http.message.BasicHeader;import org.apache.http.message.BasicHttpResponse;import org.apache.http.message.BasicLineParser;import org.apache.http.message.BasicStatusLine;import org.apache.http.message.LineParser;import org.apache.http.params.HttpParams;import org.apache.http.util.CharArrayBuffer;/** * @author yingzi * */public class MyBasicClientConnectionManager extends BasicClientConnectionManager { public MyBasicClientConnectionManager() { super(); } @Override protected ClientConnectionOperator createConnectionOperator( final SchemeRegistry sr) { return new MyClientConnectionOperator(sr); } class MyClientConnection extends DefaultClientConnection { @Override protected HttpMessageParser createResponseParser( final SessionInputBuffer buffer, final HttpResponseFactory responseFactory, final HttpParams params) { return new MyDefaultHttpResponseParser(buffer, new MyLineParser(), responseFactory, params); } } class MyDefaultHttpResponseParser extends DefaultHttpResponseParser { public MyDefaultHttpResponseParser(SessionInputBuffer buffer, LineParser parser, HttpResponseFactory responseFactory, HttpParams params) { super(buffer, parser, responseFactory, params); } @Override protected HttpResponse parseHead( final SessionInputBuffer sessionBuffer) throws IOException, HttpException { try { return super.parseHead(sessionBuffer); } catch (Exception ex) { // 压制ParseException异常 return new BasicHttpResponse(new BasicStatusLine(HttpVersion.HTTP_1_1, 200, "")); } } } class MyClientConnectionOperator extends DefaultClientConnectionOperator { public MyClientConnectionOperator(final SchemeRegistry sr) { super(sr); } @Override public OperatedClientConnection createConnection() { return new MyClientConnection(); } } class MyLineParser extends BasicLineParser { @Override public Header parseHeader(final CharArrayBuffer buffer) { try { return super.parseHeader(buffer); } catch (Exception ex) { // 压制ParseException异常 return new BasicHeader("invalid", buffer.toString()); } } }}
而我用的是MultiThreadedHttpConnectionManager, 不怎么试用, 于是我看日志的异常trac, 查看了httpClient的具体的excute方法里的代码,瞎鸡巴猜想了下, 觉得要把原来的GetMethod改掉用
/** * */package com.http;import java.io.IOException;import org.apache.commons.httpclient.Header;import org.apache.commons.httpclient.HttpConnection;import org.apache.commons.httpclient.HttpException;import org.apache.commons.httpclient.HttpParser;import org.apache.commons.httpclient.HttpState;import org.slf4j.Logger;import org.slf4j.LoggerFactory;/** * @author yingzi * */public class MyHttpGetMethod extends org.apache.commons.httpclient.methods.GetMethod { private static final Logger log = LoggerFactory.getLogger( MyHttpGetMethod.class ); public MyHttpGetMethod(){ super(); } public MyHttpGetMethod(String url){ super(url); } @Override protected void readResponseHeaders(HttpState state, HttpConnection conn) throws IOException, HttpException { getResponseHeaderGroup().clear(); Header[] headers = {new Header("Connection","Keep-Alive"), new Header("Content-Type","text/html; charset=GB18030"), new Header("Keep-Alive","timeout=20"), new Header("Cache-control","max-age=3600")}; try { headers = HttpParser.parseHeaders( conn.getResponseInputStream(), getParams().getHttpElementCharset()); } catch (Exception ex) { // 压制ParseException异常 log.warn("response header has some error info , can not parse normally."); } // Wire logging moved to HttpParser getResponseHeaderGroup().setHeaders(headers); } }
httpClient如何接收格式错误的响应头部信息
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。