首页 > 代码库 > php 模拟抓去页面一些函数整理

php 模拟抓去页面一些函数整理

curl 模拟提交post数据

/*$url @ 提交地址$postData 例$postData = http://www.mamicode.com/array("";	foreach ($postData as $k=>$v){		$o.= "$k=".urlencode($v)."&";	}	$postData=http://www.mamicode.com/substr($o,0,-1);>

 

CURL 抓取页面

/* * $url @ 抓去页面的地址 */function getcontents($url) {	$ch = curl_init(); 	$timeout = 5; 	curl_setopt($ch, CURLOPT_URL, $url); 	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 	curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); 	$contents = curl_exec($ch); 	return $contents;}

 

正则抓去Tabel 内容

 

/* *$table  @ 页面抓取过来的内容 */function get_td_array($table) {	// 去掉 HTML 标记属性	$table = preg_replace("‘<table[^>]*?>‘si", "", $table);	$table = preg_replace("‘<tr[^>]*?>‘si", "", $table);	$table = preg_replace("‘<td[^>]*?>‘si", "", $table);	$table = str_replace("</tr>", "{tr}", $table);	$table = str_replace("</td>", "{td}", $table);	// 去掉 HTML 标记		$table = preg_replace("‘<[\/\!]*?[^<>]*?>‘si", "", $table);		// 去掉空白字符	$table = preg_replace("‘([\r\n])[\s]+‘", "", $table);	$table = str_replace(" ", "", $table);	$table = str_replace(" ", "", $table);		$table = explode(‘{tr}‘, $table);	array_pop($table);	foreach ($table as $key => $tr) {		$td = explode(‘{td}‘, $tr);		array_pop($td);		$td_array[] = $td;	} 	return $td_array;}

 

php 模拟抓去页面一些函数整理