C#获取HTML文本的第一张图片与截取内容摘要示例代码

				 
	  获取第一张图片  

	要我们获得到的数据是一段HTML文本、也许这段文本里面有许多图片、需要截取一张作为标题图片、也就是做为主图、这时就可以用到下面这个方法获取到第一张图片。

	  示例代码  

				 ? 

									 #region 获取第一张图片 

									 /// <summary> 

									 /// 获取HTML文本的图片地址 

									 /// </summary> 

									 /// <param name="content"></param> 

									 /// <returns></returns>/ 

									 ///  

									 public   ArrayList getimgurl(  string   html) 

									 { 

									     ArrayList resultStr =   new   ArrayList(); 

									     Regex r =   new   Regex(  @"<IMG[^>] src=s*(?:´(?<src>[^´] )´|""(?<src>[^""] )""|(?<src>[^>s] ))s*[^>]*>"  , RegexOptions.IgnoreCase);  //忽视大小写 

									     MatchCollection mc = r.Matches(html); 

									     foreach   (Match m   in   mc) 

									     { 

									       resultStr.Add(m.Groups[  "src"  ].Value.ToLower()); 

									     } 

									     if   (resultStr.Count > 0) 

									     { 

									       return   resultStr; 

									     } 

									     else 

									     { 

									       resultStr.Clear(); 

									       return   resultStr; 

									     } 

									 } 

									 #endregion 

	  注意：  上面所返回的是一个 ArrayList  集合、包含了文本里面所有的Img的 src 、这样我们就可以访问到img的 src 了

	  截取HTML文本  

	有时候我们得到的数据是一段HTML文本、需要截取HTML文本的一部分作为内容摘要、此时、我们可以运用下面这个方法

	  示例代码  

				 ? 

									 #region 新闻内容摘要 

									 /// <summary> 

									 /// 新闻内容摘要 

									 /// </summary> 

									 /// <param name="sString"></param> 

									 /// <param name="nLeng"></param> 

									 /// <returns></returns> 

									 public   string   GetContentSummary(  string   content,   int   length,   bool   StripHTML) 

									 { 

									     if   (  string  .IsNullOrEmpty(content) || length == 0) 

									       return   ""  ; 

									     if   (StripHTML) 

									     { 

									       Regex re =   new   Regex(  "<[^>]*>"  ); 

									       content = re.Replace(content,   ""  ); 

									       content = content.Replace(  "　"  ,   ""  ).Replace(  " "  ,   ""  ); 

									       if   (content.Length <= length) 

									         return   content; 

									       else 

									         return   content.Substring(0, length)    "……"  ; 

									     } 

									     else 

									     { 

									       if   (content.Length <= length) 

									         return   content; 

									       int   pos = 0, npos = 0, size = 0; 

									       bool   firststop =   false  , notr =   false  , noli =   false  ; 

									       StringBuilder sb =   new   StringBuilder(); 

									       while   (  true  ) 

									       { 

									         if   (pos >= content.Length) 

									           break  ; 

									         string   cur = content.Substring(pos, 1); 

									         if   (cur ==   "<"  ) 

									         { 

									           string   next = content.Substring(pos  1, 3).ToLower(); 

									           if   (next.IndexOf(  "p"  ) == 0 && next.IndexOf(  "pre"  ) != 0) 

									           { 

									             npos = content.IndexOf(  ">"  , pos)  1; 

									           } 

									           else   if   (next.IndexOf(  "/p"  ) == 0 && next.IndexOf(  "/pr"  ) != 0) 

									           { 

									             npos = content.IndexOf(  ">"  , pos)  1; 

									             if   (size < length) 

									               sb.Append(  "<br/>"  ); 

									           } 

									           else   if   (next.IndexOf(  "br"  ) == 0) 

									           { 

									             npos = content.IndexOf(  ">"  , pos)  1; 

									             if   (size < length) 

									               sb.Append(  "<br/>"  ); 

									           } 

									           else   if   (next.IndexOf(  "img"  ) == 0) 

									           { 

									             npos = content.IndexOf(  ">"  , pos)  1; 

									             if   (size < length) 

									             { 

									               sb.Append(content.Substring(pos, npos - pos)); 

									               size = npos - pos  1; 

									             } 

									           } 

									           else   if   (next.IndexOf(  "li"  ) == 0 || next.IndexOf(  "/li"  ) == 0) 

									           { 

									             npos = content.IndexOf(  ">"  , pos)  1; 

									             if   (size < length) 

									             { 

									               sb.Append(content.Substring(pos, npos - pos)); 

									             } 

									             else 

									             { 

									               if   (!noli && next.IndexOf(  "/li"  ) == 0) 

									               { 

									                 sb.Append(content.Substring(pos, npos - pos)); 

									                 noli =   true  ; 

									               } 

									             } 

									           } 

									           else   if   (next.IndexOf(  "tr"  ) == 0 || next.IndexOf(  "/tr"  ) == 0) 

									           { 

									             npos = content.IndexOf(  ">"  , pos)  1; 

									             if   (size < length) 

									             { 

									               sb.Append(content.Substring(pos, npos - pos)); 

									             } 

									             else 

									             { 

									               if   (!notr && next.IndexOf(  "/tr"  ) == 0) 

									               { 

									                 sb.Append(content.Substring(pos, npos - pos)); 

									                 notr =   true  ; 

									               } 

									             } 

									           } 

									           else   if   (next.IndexOf(  "td"  ) == 0 || next.IndexOf(  "/td"  ) == 0) 

									           { 

									             npos = content.IndexOf(  ">"  , pos)  1; 

									             if   (size < length) 

									             { 

									               sb.Append(content.Substring(pos, npos - pos)); 

									             } 

									             else 

									             { 

									               if   (!notr) 

									               { 

									                 sb.Append(content.Substring(pos, npos - pos)); 

									               } 

									             } 

									           } 

									           else 

									           { 

									             npos = content.IndexOf(  ">"  , pos)  1; 

									             sb.Append(content.Substring(pos, npos - pos)); 

									           } 

									           if   (npos <= pos) 

									             npos = pos  1; 

									           pos = npos; 

									         } 

									         else 

									         { 

									           if   (size < length) 

									           { 

									             sb.Append(cur); 

									             size ; 

									           } 

									           else 

									           { 

									             if   (!firststop) 

									             { 

									               sb.Append(  "……"  ); 

									               firststop =   true  ; 

									             } 

									           } 

									           pos ; 

									         } 

									       } 

									       return   sb.ToString(); 

									     } 

									 } 

									 #endregion 

	  总结  

	以上就是利用  C#  获取一段HTML文本中的第一张图片和截取内容摘要的全部内容，希望本文的内容对大家学习或者使用C#能有所帮助，如果有疑问大家可以留言交流，谢谢大家对服务器之家的支持。

			 dy("nrwz"); 
			
查看更多关于C#获取HTML文本的第一张图片与截取内容摘要示例代码的详细内容...
声明：本文来自网络，不代表【好得很程序员自学网】立场，转载请注明出处：http://www.haodehen.cn/did56750
更新时间：2022-09-26 阅读：81次