提取google,baidu等搜索网页内容-PHP源码
在本地上没什么问题,测试结果在附图里.
1. [代码][PHP]代码
Search test
'); //echo $start.'start
'; $end=strpos($contents,''; $inf=substr($contents,$start,$len); //过滤掉google伪链接,替换搜索链接,尾部链接 $inf=str_replace('/url?q=','',$inf); $inf=str_replace('search?q=','index.php?engine=google&keywords=',$inf); //过滤google js带来的无用后缀 //$pattern='&sa=([a-z0-9_\-]){1}&ei=([a-z0-9_\-]){18,22}&ved=([a-z0-9_\-]){9,13}&usg=([a-z0-9_\-]){34}'; $pattern2='&sa=([a-z0-9_\-]){1}&ei=([a-z0-9_\-]){22}&ved=([a-z0-9_\-]){9,13}&usg=([a-z0-9_\-]){34}'; //test regular expression below $inf=eregi_replace($pattern2,' ',$inf); //$write='test writing ability'; //file_put_contents('/home/mageia/search_test',$inf); //替换类似内容链接无法打开链接 $inf=str_replace('/search?','http://www.google.com/search?',$inf); //替换google对链接的转换 $inf=urldecode($inf); echo $inf; $guid='1 2 3 4 5 6 7 8 9 10 11 12
'; //$currentpage当前所在页 $currentpage=$pn/10+1; $currentpos=strpos($guid,$currentpage.'<'); //$guid为最低下的导航页内容 $guid=substr_replace($guid,'O',$currentpos,($currentpage<10)?1:2); echo $guid; } elseif($engine=='baidu'){ header("Content-type:text/html;charset=utf-8");//加个头 require('baidu.sty'); $url='http://www.baidu.com/s?wd='.$keywords.'&pn='.$pn; //please check the url //echo $url; //$contents=file_get_contents($url); //try with curl $contents=get_data($url); //echo $contents; $start=strpos($contents,''); //echo $start.'
'; //real start position $start=strpos($contents,'',$start); //real end position $end=strpos($contents,'',$start); $len=$end-$start; //echo $end.'
'; $inf=substr($contents,$start,$len); //替换一些失效链接 $inf=str_replace('/s?wd=','index.php?engine=baidu&keywords=',$inf); echo $inf; $guid='1 2 3 4 5 6 7 8 9 10
'; //$currentpage当前所在页 $currentpage=$pn/10+1; $currentpos=strpos($guid,$currentpage.'<'); //$guid为最低下的导航页内容 $guid=substr_replace($guid,'O',$currentpos,($currentpage<10)?1:2); echo $guid; } elseif($engine=='php'){ /*it will only search php functions within https://www.php1.cn/ . If the function name you *give does not exist, it returns nothing but a empty page. */ require('php_print.css'); require('php_site.css'); require('php_mirror.css'); $url='http://php.net/search.php?&pattern='.$keywords.'&show=quickref'; //try with file_get_contents,failed remote in us server $phpcontents=file_get_contents($url); //try with curl,failed local //$phpcontents=get_data($url); //echo $phpcontents; $start=strpos($phpcontents,''; //actual start position $start+=8; //echo $start.'
'; $start=strpos($phpcontents,''); $phpinf=substr($phpcontents,$start,$end-$start); echo $phpinf; } elseif(($engine=='mysql')||($engine=='java')){ header("Content-type:text/html;charset=utf-8");//加个头 require('search-core.sty'); require('search.sty'); require('results.sty'); require('grids-ses.sty'); require('tabview-ses.sty'); require('treeview-ses.sty'); //enable page links to work $pgstart=$pn+1; $pgend=$start+9; //url中不能有回车符 if($engine=='mysql'){ $url='http://search.oracle.com/search/search?&q='.$keywords.'&group=MySQL&search_startnum='.$pgstart.'&search_endnum='.$pgend.'&num=10';} else{ $url='http://search.oracle.com/search/search?search_p_main_operator=all&group=Documentation&q='.$keywords.'+url:/javase'.'&search_startnum='.$pgstart.'&search_endnum='.$pgend.'&num=10'; } //echo $url; //$mysqlcontents=file_get_contents($url); //try with curl $mysqlcontents=get_data($url); $start=strpos($mysqlcontents,'
'); $end=strpos($mysqlcontents,'
'); $mysqlinf=substr($mysqlcontents,$start,$end-$start); echo $mysqlinf; if($engine=='mysql'){ $guid='
1 2 3 4 5
'; }else{ $guid='1 2 3 4 5
'; } $currentpage=$pn/10+1; $currentpos=strpos($guid,$currentpage.'<'); //$guid为最低下的导航页内容 $guid=substr_replace($guid,'O',$currentpos,($currentpage<10)?1:2); echo $guid; } else{ echo 'Sorry,This search engine will be supported soon.'; } /* gets the data from a URL */ function get_data($url) { $ch = curl_init(); $timeout = 5; curl_setopt($ch,CURLOPT_URL,$url); curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout); $data = curl_exec($ch); curl_close($ch); return $data; } ?>
2. [图片]屏幕截图.png
3. [图片] 屏幕截图.png
4. [图片] 屏幕截图.png
5. [图片] 屏幕截图.png
6. 屏幕截图.png
7. [图片] 屏幕截图.png
PHP之友评论