简单PHP搜索引擎源代码,需要开启PHP的cURL扩展。功能:对某一网址进行检索,获取网站基本信息,同时提取网站的所有连接。 <?php class Engine{ private $_url = ''; //URL地址 private $_sites = ''; //页面信息 public function __construct($url){ $this->_url = $url; } //启动引擎 public function start(){ //$content = $this->socketOpen($this->_url); $content = $this->getContent($this->_url); $this->_sites['url'] = $this->_url; $this->_sites['meta'] = $this->getMeta($content); $this->_sites['title'] = $this->getTitle($content); //$this->_sites['detail'] = $this->getDetail($content); $this->_sites['links'] = $this->getLinks($content); } //获取meta内容 public function getMeta($content){ $file = 'metaCache'; file_put_contents($file,$content); $meta = get_meta_tags($file); return $meta; } //获取body内容 public function getDetail($content){ preg_match('/<body>(.*?)<\/body>/i',$content,$matchs); $body = $this->stripHTML($matchs[1]); return substr($body,0,400); } //获取title内容 public function getTitle($content){ preg_match('/<title>(.+)<\/title>/i',$content,$matchs); return $matchs[1]; } //获取a链接 public function getLinks($content){ $pat = '/<a[^>](.*?)href="(.*?)"(.*?)>(.*?)<\/a>/i'; preg_match_all($pat,$content,$matchs); $result['href'] = $matchs[2]; $result['name'] = $this->stripTags($matchs[4]); return $result; } //Socket监听 public function socketOpen($url){ $fp = fsockopen($url,80,$errno,$errstr,30); if($fp === false){ echo "连接失败:$errstr($errno)<br/>"; return false; } else{ $out = "GET/HTTP/1.1\r\n"; $out .= "Host:$url\r\n"; $out .= "Connection:Close\r\n"; fwrite($fp,$out); $content = ''; while(!feof($fp)){ $content .= fgets($fp,1024); } fclose($fp); var_dump($content);exit; return $content; } } //获取指定url内容 public function getContent($url){ $ch = @curl_init($url); (责任编辑:laiquliu) |