php截取html字符串自动补全html标签
文章来总结一下关于利用php截取html字符串自动补全html标签,实际开发中会经常碰到,很多人直接先strip_tags过滤掉html标签,但是就只剩下纯文本了,可读性非常差,下面是一个函数,代码如下:
/** * 截取HTML,并自动补全闭合 * @param $html * @param $length * @param $end */ function subHtml( $html , $length ) { $result = '' ; $tagStack = array (); $len = 0; $contents = preg_split( "~(<[^>]+?>)~si" , $html , -1,PREG_SPLIT_NO_EMPTY| PREG_SPLIT_DELIM_CAPTURE); foreach ( $contents as $tag ) { if (trim( $tag )== "" ) continue ; if (preg_match( "~<([a-z0-9]+)[^/>]*?/>~si" , $tag )){ $result .= $tag ; } else if (preg_match( "~</([a-z0-9]+)[^/>]*?>~si" , $tag , $match )){ if ( $tagStack [ count ( $tagStack )-1] == $match [1]){ array_pop ( $tagStack ); $result .= $tag ; } } else if (preg_match( "~<([a-z0-9]+)[^/>]*?>~si" , $tag , $match )){ array_push ( $tagStack , $match [1]); $result .= $tag ; } else if (preg_match( "~<!--.*?-->~si" , $tag )){ $result .= $tag ; } else { if ( $len + mstrlen( $tag ) < $length ){ $result .= $tag ; $len += mstrlen( $tag ); } else { $str = msubstr( $tag ,0, $length - $len +1); $result .= $str ; break ; } } } while (! empty empty ( $tagStack )){ $result .= '</' . array_pop ( $tagStack ). '>' ; } return $result ; } /** * 截取中文字符串 * @param $string 字符串 * @param $start 起始位 * @param $length 长度 * @param $charset 编码 * @param $dot 附加字串 */ function msubstr( $string , $start , $length , $dot = '' , $charset = 'UTF-8' ) { $string = str_replace ( array ( '&' , '"' , '<' , '>' , ' ' ), array ( '&' , '"' , '<' , '>' , ' ' ), $string ); if ( strlen ( $string ) <= $length ) { return $string ; } if ( strtolower ( $charset ) == 'utf-8' ) { $n = $tn = $noc = 0; while ( $n < strlen ( $string )) { $t = ord( $string [ $n ]); if ( $t == 9 || $t == 10 || (32 <= $t && $t <= 126)) { $tn = 1; $n ++; } elseif (194 <= $t && $t <= 223) { $tn = 2; $n += 2; } elseif (224 <= $t && $t <= 239) { $tn = 3; $n += 3; } elseif (240 <= $t && $t <= 247) { $tn = 4; $n += 4; } elseif (248 <= $t && $t <= 251) { $tn = 5; $n += 5; } elseif ( $t == 252 || $t == 253) { $tn = 6; $n += 6; } else { $n ++; } $noc ++; if ( $noc >= $length ) { break ; } } if ( $noc > $length ) { $n -= $tn ; } $strcut = substr ( $string , 0, $n ); } else { for ( $i = 0; $i < $length ; $i ++) { $strcut .= ord( $string [ $i ]) > 127 ? $string [ $i ]. $string [++ $i ] : $string [ $i ]; } } return $strcut . $dot ; } /** * 取得字符串的长度,包括中英文。 */ function mstrlen( $str , $charset = 'UTF-8' ){ if (function_exists( 'mb_substr' )) { $length =mb_strlen( $str , $charset ); } elseif (function_exists( 'iconv_substr' )) { $length =iconv_strlen( $str , $charset ); } else { //开源代码phpfensi.com preg_match_all( "/[x01-x7f]|[xc2-xdf][x80-xbf]|xe0[xa0-xbf][x80-xbf]|[xe1-xef][x80-xbf][x80-xbf]|xf0[x90-xbf][x80-xbf][x80-xbf]|[xf1-xf7][x80-xbf][x80-xbf][x80-xbf]/" , $text , $ar ); $length = count ( $ar [0]); } return $length ; }实例,代码如下:
* @param 要截取的HTML $str * @param 截取的数量 $num * @param 是否需要加上更多 $more * @ return 截取串 */ function phpos_chsubstr_ahtml( $str , $num , $more =false) { $leng = strlen ( $str ); if ( $num >= $leng ) return $str ; $word =0; $i =0; /** 字符串指针 **/ $stag = array ( array ()); /** 存放开始HTML的标志 **/ $etag = array ( array ()); /** 存放结束HTML的标志 **/ $sp = 0; $ep = 0; while ( $word != $num ) { if (ord( $str [ $i ])>128) { //$re.=substr($str,$i,3); $i +=3; $word ++; } else if ( $str [ $i ]== '<' ) { if ( $str [ $i +1] == '!' ) { $i ++; continue ; } if ( $str [ $i +1]== '/' ) { $ptag =& $etag ; $k =& $ep ; $i +=2; } else { $ptag =& $stag ; $i +=1; $k =& $sp ; } for (; $i < $leng ; $i ++) { if ( $str [ $i ] == ' ' ) { $ptag [ $k ] = implode( '' , $ptag [ $k ]); $k ++; break ; } if ( $str [ $i ] != '>' ) { $ptag [ $k ][]= $str [ $i ]; continue ; } else { $ptag [ $k ] = implode( '' , $ptag [ $k ]); $k ++; break ; } } $i ++; continue ; } else { //$re.=substr($str,$i,1); $word ++; $i ++; } } foreach ( $etag as $val ) { $key1 = array_search ( $val , $stag ); if ( $key1 !== false) unset( $stag [ $key ]); } foreach ( $stag as $key => $val ) { if (in_array( $val , array ( 'br' , 'img' ))) unset( $stag [ $key1 ]); } array_reverse ( $stag ); $ends = '</' .implode( '></' , $stag ). '>' ; $re = substr ( $str ,0, $i ). $ends ; if ( $more ) $re .= '...' ; return $re ; }PHP截取字符串,生成文章摘要,我们在写BLOG时经常需要显示文章前一部分,但是又怕不恰当截断破坏封闭标签以造成整个文档结构破坏,代码如下:
function text_zhaiyao( $text , $length ){ //文章摘要生成函数 $test:内容 $length:摘要长度 global $Briefing_Length ; mb_regex_encoding( "UTF-8" ); if (mb_strlen( $text ) <= $length ) return $text ; $Foremost = mb_substr( $text , 0, $length ); $re = "<(/?) (P|DIV|H1|H2|H3|H4|H5|H6|ADDRESS|PRE|TABLE|TR|TD|TH|INPUT|SELECT|TEXTAREA|OBJECT|A|UL|OL|LI| BASE|META|LINK|HR|BR|PARAM|IMG|AREA|INPUT|SPAN)[^>]*(>?)"; $Single = "/BASE|META|LINK|HR|BR|PARAM|IMG|AREA|INPUT|BR/i" ; $Stack = array (); $posStack = array (); mb_ereg_search_init( $Foremost , $re , 'i' ); while ( $pos = mb_ereg_search_pos()){ $match = mb_ereg_search_getregs(); /* [Child-matching Formulation]: $matche[1] : A "/" charactor indicating whether current "<...>" Friction is Closing Part $matche[2] : Element Name. $matche[3] : Right > of a "<...>" Friction */ if ( $match [1]== "" ){ $Elem = $match [2]; if (mb_eregi( $Single , $Elem ) && $match [3] != "" ){ continue ; }查看更多关于php截取html字符串自动补全html标签 - php高级应用的详细内容...
声明:本文来自网络,不代表【好得很程序员自学网】立场,转载请注明出处:http://www.haodehen.cn/did30286