phpQuery ------实战应用thinkphp框架
相信许多朋友看小说都有一种感觉就是总有一些广告页面弹出来,加上最近在研究phpQuery进行数据采集,所以萌生了一个写一个小说网站玩的想法,经过大概4天的时间终于完成了,主要运用thinkphp框架MongoDb和phpQuery.
如下是一个分页采集的例子:
public function pageHtml( $searchkey , $page = 1) { Vendor( 'phpQuery.phpQuery.phpQuery' ); $pageHtml = Baidu::searchPage( $searchkey , $page ); $pageHtml = \phpQuery::newDocumentHTML( $pageHtml ); $count = $pageHtml ->find( '#pagelink .last' )->html(); $trs = $pageHtml ->find( '#content tr' ); foreach ( $trs as $tr ) { $book [ 'href' ] = pq( $tr )->find( 'td:eq(0) a' )->attr( 'href' ); $book [ 'title' ] = Baidu::utf8(pq( $tr )->find( 'td:eq(0) a' )->html()); $book [ 'bookId' ] = intval ( end ( explode ( '/' , $book [ 'href' ]))); $href = explode ( '/' , pq( $tr )->find( 'td:eq(1) a' )->attr( 'href' )); $book [ 'newZj' ] = Baidu::DING_DIAN . $href [3] . '/' . $href [4] . '/' . $href [5] . '.html' ; $book [ 'newName' ] = Baidu::utf8(pq( $tr )->find( 'td:eq(1) a' )->html()); $book [ 'author' ] = Baidu::utf8(pq( $tr )->find( 'td:eq(2)' )->html()); $book [ 'ziNum' ] = Baidu::utf8(pq( $tr )->find( 'td:eq(3)' )->html()); $book [ 'lastDate' ] = Baidu::utf8(pq( $tr )->find( 'td:eq(4)' )->html()); $book [ 'status' ] = Baidu::utf8(pq( $tr )->find( 'td:eq(5)' )->html()); $n = D( 'book' )->where( array ( 'bookId' => $book [ 'bookId' ]))->find(); $book [ 'update' ] = time(); if ( $n ) { $book [ '_id' ] = $n [ '_id' ]; $book [ 'create' ] = $n [ 'create' ]; D( 'book' )->create( $book ); D( 'book' )->save(); } else { if ( $book [ 'href' ]) { $book [ 'create' ] = time(); D( 'book' )->add( $book ); } } } \phpQuery:: $documents = array (); return $count ; }由于用的是mongoDb所以字符串要用utf-8的编码多有在Think\BaiduModel中做了一个转码如下:
public static function utf8(String $string ) { if ( is_string ( $string )) { $untion = mb_detect_encoding( $string , array ( "ASCII" , 'UTF-8' , "GB2312" , "GBK" , 'BIG5' )); //转码 if ( $untion != 'UTF-8' ) { $string = mb_convert_encoding( $string , "UTF-8" , $untion ); } //开源软件:phpfensi.com return $string ; } return 'this is not string' ; }phpquery会占用过多内存的解决方法http://www.neatstudio.com/archives/?article-1928.html
查看更多关于phpQuery ------实战应用thinkphp框架 - Thinkphp的详细内容...
声明:本文来自网络,不代表【好得很程序员自学网】立场,转载请注明出处:http://www.haodehen.cn/did6320