好得很程序员自学网

<tfoot draggable='sEl'></tfoot>

php 问问采集代码 - php高级应用

php 问问采集代码

提供一款免费的php 问问采集代码,如果你正想采集问问,但是又不知道怎么写采集程序,这里为你提供一款经典的问问采集程序,代码如下:

<?php   session_start();   header( "content-type:text/html;charset=utf-8" );   require ( "stole_config.php" );   require ( "conn.php" );   require ( "keyword.php" );   if (! empty empty ( $_post [ 'ask' ]))   {   $ask =urlencode(trim( $_post [ 'ask' ])); //获取表单提交的问题    $sp = "s" . $ask ;   } else    {   $sp =urlencode( $_get [ 'sp' ]);   }   if ( empty empty ( $_get [ 'jl' ]))   {   $_get [ 'jl' ]=1;   }   $jl = $_get [ 'jl' ];   $pg = intval ( $_get [ 'pg' ]); //获取页数    $rs = intval ( $_get [ 'rs' ]); //获得 记录的参数    if ( $rs >9)   {   $rs =0;   $pg ++;   }   if ( $pg >51)   {   echo   "采集完毕! 总共采集 " .urldecode( $sp ). " " . $jl . "条记录" ;   exit ();   }   if ( $sp ) //有设定答案才开始    {   $str =@ file_get_contents ( "http://wenwen.soso测试数据/z/search.e?sp={$sp}&pg={$pg}" );   @preg_match( "/<ol class=" result_list ">(.*)</ol>/ius" , $str , $asklist ); //获取问答列表    //echo $asklist[1];    $url = "/<a target=" _blank " href=" /z/(q.*.htm)/ius";   @preg_match_all( $url , $asklist [1], $urllist ); //获取 所有的问题    $t = $urllist [1][ $rs ];   $uid = $t ;   $suid = "ww{$uid}" ;   $sct =mysql教程_query( "select count(*) from {$table_prefix}c_article where suid='$suid' " );   $sct =mysql_fetch_array( $sct );   $sct = $sct [0];   if ( $sct ==0)   {   $html =@ file_get_contents ( "http://wenwen.soso测试数据/z/${t}" );   $html = str_replace ( "<pre>" , "" , str_replace ( "</pre>" , "" , $html ));   $html = str_replace ( "<br/><br/><br/>" , "<br/><br/>" , $html );   //echo $html;    @preg_match( "/<div class=" question_main ">.*<h3>(.*)</h3>/ius" , $html , $ask_title );   $art_title = $ask_title [1];   @preg_match( "/<div class=" answer_con ">(.*)</div>/ius" , $html , $answer );   $j = count ( $answer )-1;   $art_content = "" ; //商品详细    for ( $i = $j ; $i >=1; $i --)   {   if ( strlen ( $answer [ $i ])> $min_t1 )   {   $art_content  .=  $answer [ $i ];   }   }   $art_content =trim( $art_content );   $s1 = "/(<a .*>)(.*)</a>/ius" ;   $art_content =preg_replace( $s1 ,${2},trim( $art_content ));   $word_arr = explode ( "," ,iconv( "gbk" , "utf-8" , $cj_word ));   $word_allow =false; //初始化是否允许采集    $word_count = count ( $word_arr ); //总数    for ( $i =0; $i < $word_count ; $i ++)   {   if (substr_count( $art_title , $word_arr [ $i ])>0)   {   $word_allow =1;   $i = $word_count ;   }   }   if ( $word_allow ) //如果合法    {  //开始处理数据库教程    if ( strlen ( $art_content )> $min_t2 )   {   echo   "<font color=red>添加中............................</font><br>" ;   echo   $art_title . "<br>" ;   $art_title =iconv( 'utf-8' , 'gbk' ,  $art_title );   $title_ct =mysql_query( "select count(*) from {$table_prefix}c_article where art_title ='$art_title' " ); //查看标题是否重复    $title_ct =@mysql_fetch_array( $title_ct );   $title_ct = $title_ct [0];   if ( $title_ct >0)   {   $art_title  .= "{$same_title}{$title_ct}" ;   }   $art_content =iconv( 'utf-8' , 'gbk' , str_replace ( " " , "<br>" , $art_content ));   $art_content = strtr ( $art_content , $keyword );   $art_time = date ( "y-m-d" );   $sql = "insert into {$table_prefix}c_article(art_title,art_content,art_time,art_author,suid) values('$art_title','$art_content','$art_time','$art_author','$suid')" ; //插入采集表    mysql_query( $sql );   if ( empty empty ( $t_catx_id )) //如果无分类    {   $sql2 = "insert into {$t_table}({$t_art_title},{$t_art_content},{$t_art_time},{$t_artx_author}) values('$art_title','$art_content','$art_time','$art_author')" ;   } else    {   $sql2 = "insert into {$t_table}({$t_art_title},{$t_art_content},{$t_art_time},{$t_artx_author},{$t_catx_id}) values('$art_title','$art_content','$art_time','$art_author','$cat_id')" ;   }   mysql_query( $sql2 ); //插入文章表    $jl ++; //如果存放数据库中 则记录加1    //处理数据库结束    } else    {   echo   "长度不够" ;   }   } else    {   echo   "主题不符合要求" ;   }   } else    {   echo   "已经存在" ;   }   $rs ++;   //记录下本次采集 的状况    $f_tt = urldecode( $sp ). "--页数" . $pg . " 记录数 " . $jl  ;   file_put_contents ( "ss.txt" , $f_tt );   echo   "<script>location.href='wenwen.php?jl=" . $jl . "&sp=" . $sp . "&pg=" . $pg . "&rs=" . $rs . " ';</script>" ;   exit ();   }   ?>   <link href= "style.css教程"  rel= "stylesheet"  type= "text/css"  />   <table width= "700"  border= "0"  align= "center"  cellspacing= "1"  bgcolor= "#cccccc" >   <tr>   <td height= "50"  align= "center"  bgcolor= "#00cc00" ><h1><a href= "http://HdhCmsTest111cn.net" >荐礼啦</a>知道问问采集插件</h1></td>   </tr>   </table>   <table width= "700"  border= "0"  align= "center"  cellspacing= "1"  bgcolor= "#cccccc"  style= "margin-top:6px; margin-bottom:6px;" >   <tr>   <td height= "30"  align= "center"  bgcolor= "#ffffff" ><a href= "cj_config.php" >采集设置</a> <a href= "uninstall.php"  onclick= "return confirm('您确定要卸载采集插件吗');" >卸载采集</a> <a href= "cj_view.php" >查看采集记录</a> <a href= "cj_help.php" >采集帮助</a> <a href= "baidu.php"  target= "_blank" >知道采集</a> <a href= "wenwen.php"  target= "_blank" >问问采集</a></td>   </tr>   </table>   <form action= "wenwen.php"  method= "post" >   <table width= "628"  height= "49"  border= "0"  align= "center" >   <tr>  //开源代码phpfensi测试数据   <td width= "413"  align= "right" ><input name= "ask"  type= "text"  id= "ask"  size= "50" ></td>   <td width= "205" ><input type= "submit"  name= "button"  id= "button"  value= "问问采集"  style= " padding-left:15px; padding-right:15px; height:25px; line-height:25px;" ></td>   </tr>   </table>   </form> 

查看更多关于php 问问采集代码 - php高级应用的详细内容...

  阅读:57次