首页 > 代码库 > httpClient如何接收格式错误的响应头部信息

httpClient如何接收格式错误的响应头部信息

Exception in thread "main" org.apache.commons.httpclient.ProtocolException: Unable to parse header: share memory not exist, need create new share memory!at org.apache.commons.httpclient.HttpParser.parseHeaders(HttpParser.java:202)at org.apache.commons.httpclient.HttpMethodBase.readResponseHeaders(HttpMethodBase.java:1935)at org.apache.commons.httpclient.HttpMethodBase.readResponse(HttpMethodBase.java:1737)at org.apache.commons.httpclient.HttpMethodBase.execute(HttpMethodBase.java:1098)at org.apache.commons.httpclient.HttpMethodDirector.executeWithRetry(HttpMethodDirector.java:398)at org.apache.commons.httpclient.HttpMethodDirector.executeMethod(HttpMethodDirector.java:171)at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:397)at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:323)

做网页爬虫的时候, 模拟Get请求, 访问网址, 显示上面的错误异常。 问了3个前辈,都说没遇到过, 这可头疼屎了。

主要也不知道是什么问题, 有人说可能是buffer,把大小设置大点。花了一个晚上查网上资料, 有关终于有点眉目, 见参考网址 : http://bbs.csdn.net/topics/390178589

/** *  */package com.http;import java.io.IOException;import org.apache.http.Header;import org.apache.http.HttpException;import org.apache.http.HttpResponse;import org.apache.http.HttpResponseFactory;import org.apache.http.HttpVersion;import org.apache.http.conn.ClientConnectionOperator;import org.apache.http.conn.OperatedClientConnection;import org.apache.http.conn.scheme.SchemeRegistry;import org.apache.http.impl.conn.BasicClientConnectionManager;import org.apache.http.impl.conn.DefaultClientConnection;import org.apache.http.impl.conn.DefaultClientConnectionOperator;import org.apache.http.impl.conn.DefaultHttpResponseParser;import org.apache.http.io.HttpMessageParser;import org.apache.http.io.SessionInputBuffer;import org.apache.http.message.BasicHeader;import org.apache.http.message.BasicHttpResponse;import org.apache.http.message.BasicLineParser;import org.apache.http.message.BasicStatusLine;import org.apache.http.message.LineParser;import org.apache.http.params.HttpParams;import org.apache.http.util.CharArrayBuffer;/** * @author yingzi * */public class MyBasicClientConnectionManager extends BasicClientConnectionManager {    public MyBasicClientConnectionManager() {        super();    }        @Override    protected ClientConnectionOperator createConnectionOperator( final SchemeRegistry sr) {        return new MyClientConnectionOperator(sr);    }            class MyClientConnection extends DefaultClientConnection {        @Override        protected HttpMessageParser createResponseParser(                final SessionInputBuffer buffer,                final HttpResponseFactory responseFactory,                final HttpParams params) {            return new MyDefaultHttpResponseParser(buffer, new MyLineParser(),                    responseFactory, params);        }    }        class MyDefaultHttpResponseParser extends DefaultHttpResponseParser {        public MyDefaultHttpResponseParser(SessionInputBuffer buffer,                LineParser parser, HttpResponseFactory responseFactory,                HttpParams params) {            super(buffer, parser, responseFactory, params);        }        @Override        protected HttpResponse parseHead(            final SessionInputBuffer sessionBuffer) throws IOException, HttpException {            try {                return super.parseHead(sessionBuffer);            } catch (Exception ex) {                // 压制ParseException异常                return new BasicHttpResponse(new BasicStatusLine(HttpVersion.HTTP_1_1, 200, ""));            }        }    }        class MyClientConnectionOperator extends DefaultClientConnectionOperator {        public MyClientConnectionOperator(final SchemeRegistry sr) {            super(sr);        }            @Override        public OperatedClientConnection createConnection() {            return new MyClientConnection();        }    }        class MyLineParser extends BasicLineParser {        @Override        public Header parseHeader(final CharArrayBuffer buffer) {            try {                return super.parseHeader(buffer);            } catch (Exception ex) {                // 压制ParseException异常                return new BasicHeader("invalid", buffer.toString());            }        }    }}
MyBasicClientConnectionManager

 

而我用的是MultiThreadedHttpConnectionManager, 不怎么试用, 于是我看日志的异常trac, 查看了httpClient的具体的excute方法里的代码,瞎鸡巴猜想了下, 觉得要把原来的GetMethod改掉用

/** *  */package com.http;import java.io.IOException;import org.apache.commons.httpclient.Header;import org.apache.commons.httpclient.HttpConnection;import org.apache.commons.httpclient.HttpException;import org.apache.commons.httpclient.HttpParser;import org.apache.commons.httpclient.HttpState;import org.slf4j.Logger;import org.slf4j.LoggerFactory;/** * @author yingzi * */public class MyHttpGetMethod extends org.apache.commons.httpclient.methods.GetMethod {    private static final Logger log = LoggerFactory.getLogger( MyHttpGetMethod.class );    public MyHttpGetMethod(){        super();    }        public MyHttpGetMethod(String url){        super(url);    }        @Override    protected void readResponseHeaders(HttpState state, HttpConnection conn)         throws IOException, HttpException {             getResponseHeaderGroup().clear();             Header[] headers = {new Header("Connection","Keep-Alive"), new Header("Content-Type","text/html; charset=GB18030"), new Header("Keep-Alive","timeout=20"), new Header("Cache-control","max-age=3600")};             try {                     headers = HttpParser.parseHeaders(                             conn.getResponseInputStream(), getParams().getHttpElementCharset());                } catch (Exception ex) {                    // 压制ParseException异常                    log.warn("response header has some error info , can not parse normally.");                }                          // Wire logging moved to HttpParser             getResponseHeaderGroup().setHeaders(headers);    }    }

 

httpClient如何接收格式错误的响应头部信息