首页 > 代码库 > Delphi XE下获取网页源码记录

Delphi XE下获取网页源码记录

存放个自己写的获取网页源码,掌握了:

1.利用CreateOLEObject方式获取源码

2.自动判断网页格式编码

需要使用到的单元:Winapi.ActiveX,System.Win.ComObj,System.WideStrUtils

需要创建结构体:TResultWebHtml (用于存放返回的源码和Cookies)

 
Uses Winapi.ActiveX,System.Win.ComObj,System.WideStrUtils;type  TResultWebHtml = record    Html : String;    Cookie : String;  end;function GetWebHtml(Url,Method,Code,ReferText,ReferCookies:String;Overtime:Integer;Referer,Accept,Language,Charset,Agent,ContentType:String;Redirect,Encoding,XRequestedWith:BooLean):TResultWebHtml;stdcall;//访问网页Var  I:Integer;  XMLHTTP:Olevariant;  ResultWebHtml:TResultWebHtml;  POvertime:Integer;  PUrl,PMethod,PCode,PReferText,PReferCookies:String;  PReferer,PAccept,PLanguage,PCharset,PAgent,PContentType:String;  PHtml: RawByteString;  Temp,PGetCookies:String;  TempList:TStringList;  HTML:TBytes;Begin  try    CoInitialize(nil); //添加 CoInitialize 支持多线程调用    XMLHTTP:= CreateOLEObject(WinHttp.WinHttpRequest.5.1);    Try //避免超时报错、防止出错等      //初始化默认值      if Url = ‘‘ then Exit else PUrl:=Url; //网址初始化      if Method = ‘‘ then PMethod:= get else PMethod:= LowerCase(Method); //初始化访问方式      if Code <> ‘‘ then PCode:= LowerCase(Code);      //初始化网站编码,为空自动选择      if ReferText <> ‘‘ then PReferText:=ReferText;//设置提交信息,用于Post操作      if ReferCookies <> ‘‘ then PReferCookies:=ReferCookies;//设置网站Cookies      if Overtime = 0 then POvertime:=15000 Else POvertime:=Overtime * 1000; //设置超时时间      if Referer = ‘‘ then PReferer:= PUrl else PReferer:= Referer; //设置来路,如果为空则设置为访问网站地址      if Accept = ‘‘ then PAccept:=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 else PAccept:=Accept;      if Language = ‘‘ then PLanguage:=zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4 else PLanguage:=Language;      if Charset = ‘‘ then PCharset:=GBK,utf-8;q=0.7,*;q=0.3 else PCharset:=Charset;      if Agent = ‘‘ then PAgent:=Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2107.3 Safari/537.36 else PAgent:=Agent;      if ContentType = ‘‘ then PContentType:=application/x-www-form-urlencoded else PContentType:=ContentType;      XMLHTTP.open(PMethod, PUrl, False);      //设置头信息      XMLHTTP.setRequestHeader(Referer, PReferer);  // 仅服务器可知 来路      XMLHTTP.setRequestHeader(Accept, PAccept);      XMLHTTP.setRequestHeader(Accept-Language, PLanguage);      //XMLHTTP.setRequestHeader(‘Accept-Charset‘, PCharset);      XMLHTTP.setRequestHeader(User-Agent, PAgent);      XMLHTTP.setRequestHeader(Content-Type, PContentType);      XMLHTTP.setRequestHeader(Connection, keep-alive);      if PReferCookies <> ‘‘ then XMLHTTP.setRequestHeader(Cookie, PReferCookies);      if Encoding then XMLHTTP.setRequestHeader(Accept-Encoding, gzip); //设置压缩 ‘gzip‘      if Redirect then XMLHTTP.Option(6) :=True else XMLHTTP.Option(6) :=False;//设置是否支持转跳      if XRequestedWith then XMLHTTP.setRequestHeader(X-Requested-With, XMLHttpRequest);      XMLHTTP.setTimeouts(POvertime, POvertime, POvertime, POvertime);      XMLHTTP.send(PReferText);      HTML:=XMLHTTP.responseBody;      if PCode = ‘‘ then begin //开启自动选择        //自动判断网页格式后        SetString(PHtml, PAnsiChar(Pointer(WideString(XMLHTTP.ResponseBody))), SysStringByteLen(PWideChar(WideString(XMLHTTP.ResponseBody))));        if IsUTF8String(PHtml) then Temp := TEncoding.Default.GetEncoding(65001).GetString(HTML) else Temp :=TEncoding.Default.GetString(HTML);      end else begin        if PCode = utf-8 then Temp := TEncoding.Default.GetEncoding(65001).GetString(HTML) else Temp :=TEncoding.Default.GetString(HTML);      end;      ResultWebHtml.Html:=Temp;      //获取返回COOKIES      Temp:=XMLHTTP.GetallResponseHeaders;      TempList:=TStringList.Create;      TempList.Text :=Temp;      for I := 0 to TempList.Count -1 do begin        if Pos(Set-Cookie:,TempList[I]) <> 0 then begin          Temp:=Copy(TempList[I],12,Length(TempList[I]));          Temp:=Copy(Temp,1,Pos(;,Temp));          if Temp <> ‘‘ then PGetCookies:=PGetCookies+Temp;        end;      end;      TempList.Free;      ResultWebHtml.Cookie :=PGetCookies;      Result:=ResultWebHtml;    except    End;  finally    XMLHTTP := Unassigned;    CoUnInitialize;  end;End;procedure TForm1.Button1Click(Sender: TObject);begin  Memo1.Text:=GetWebHtml(http://www.cnblogs.com/sishen,Get,‘‘,‘‘,‘‘,30,‘‘,‘‘,‘‘,‘‘,‘‘,‘‘,True,False,False).Html;end;

 

Delphi XE下获取网页源码记录