php网络爬虫技术-PHP源码
php代码
function get_urls($url){
$url_array=array();
$the_first_content=file_get_contents($url);
$the_second_content=file_get_contents($url);
$pattern1 = "/http:\/\/[a-zA-Z0-9\.\?\/\-\=\&\:\+\-\_\'\"]+/";
$pattern2="/http:\/\/[a-zA-Z0-9\.]+/";
preg_match_all($pattern2, $the_second_content, $matches2);
preg_match_all($pattern1, $the_first_content, $matches1);
$new_array1=array_unique($matches1[0]);
$new_array2=array_unique($matches2[0]);
$final_array=array_merge($new_array1,$new_array2);
$final_array=array_unique($final_array);
for($i=0;$i
/php/34238.htmlwww.phpzy.comtrue/php/34238.htmlTechArticlephp网络爬虫技术-PHP源码 php代码 function get_urls($url){ $url_array=array(); $the_first_content=file_get_contents($url); $the_second_content=file_get_contents($url); $pattern1 = "/http:\/\/[a-zA-Z0-9\.\?\/\-\=\#39;\"]+/"; $pattern...
PHP之友评论