C#获取HTML文本的第一张图片与截取内容摘要示例代码

获取第一张图片

要我们获得到的数据是一段HTML文本、也许这段文本里面有许多图片、需要截取一张作为标题图片、也就是做为主图、这时就可以用到下面这个方法获取到第一张图片。

示例代码

				?

									#region 获取第一张图片

									/// <summary>

									/// 获取HTML文本的图片地址

									/// </summary>

									/// <param name="content"></param>

									/// <returns></returns>/

									/// 

									public ArrayList getimgurl(string html)

									{

									  ArrayList resultStr = new ArrayList();

									  Regex r = new Regex(@"<IMG[^>] src=s*(?:´(?<src>[^´] )´|""(?<src>[^""] )""|(?<src>[^>s] ))s*[^>]*>", RegexOptions.IgnoreCase);//忽视大小写

									  MatchCollection mc = r.Matches(html);

									  foreach (Match m in mc)

									  {

									    resultStr.Add(m.Groups["src"].Value.ToLower());

									  }

									  if (resultStr.Count > 0)

									  {

									    return resultStr;

									  }

									  else

									  {

									    resultStr.Clear();

									    return resultStr;

									  }

									}

									#endregion

注意：上面所返回的是一个ArrayList 集合、包含了文本里面所有的Img的src、这样我们就可以访问到img的src了

截取HTML文本

有时候我们得到的数据是一段HTML文本、需要截取HTML文本的一部分作为内容摘要、此时、我们可以运用下面这个方法

示例代码

				?

									#region 新闻内容摘要

									/// <summary>

									/// 新闻内容摘要

									/// </summary>

									/// <param name="sString"></param>

									/// <param name="nLeng"></param>

									/// <returns></returns>

									public string GetContentSummary(string content, int length, bool StripHTML)

									{

									  if (string.IsNullOrEmpty(content) || length == 0)

									    return "";

									  if (StripHTML)

									  {

									    Regex re = new Regex("<[^>]*>");

									    content = re.Replace(content, "");

									    content = content.Replace("　", "").Replace(" ", "");

									    if (content.Length <= length)

									      return content;

									    else

									      return content.Substring(0, length)  "……";

									  }

									  else

									  {

									    if (content.Length <= length)

									      return content;

									    int pos = 0, npos = 0, size = 0;

									    bool firststop = false, notr = false, noli = false;

									    StringBuilder sb = new StringBuilder();

									    while (true)

									    {

									      if (pos >= content.Length)

									        break;

									      string cur = content.Substring(pos, 1);

									      if (cur == "<")

									      {

									        string next = content.Substring(pos  1, 3).ToLower();

									        if (next.IndexOf("p") == 0 && next.IndexOf("pre") != 0)

									        {

									          npos = content.IndexOf(">", pos)  1;

									        }

									        else if (next.IndexOf("/p") == 0 && next.IndexOf("/pr") != 0)

									        {

									          npos = content.IndexOf(">", pos)  1;

									          if (size < length)

									            sb.Append("<br/>");

									        }

									        else if (next.IndexOf("br") == 0)

									        {

									          npos = content.IndexOf(">", pos)  1;

									          if (size < length)

									            sb.Append("<br/>");

									        }

									        else if (next.IndexOf("img") == 0)

									        {

									          npos = content.IndexOf(">", pos)  1;

									          if (size < length)

									          {

									            sb.Append(content.Substring(pos, npos - pos));

									            size = npos - pos  1;

									          }

									        }

									        else if (next.IndexOf("li") == 0 || next.IndexOf("/li") == 0)

									        {

									          npos = content.IndexOf(">", pos)  1;

									          if (size < length)

									          {

									            sb.Append(content.Substring(pos, npos - pos));

									          }

									          else

									          {

									            if (!noli && next.IndexOf("/li") == 0)

									            {

									              sb.Append(content.Substring(pos, npos - pos));

									              noli = true;

									            }

									          }

									        }

									        else if (next.IndexOf("tr") == 0 || next.IndexOf("/tr") == 0)

									        {

									          npos = content.IndexOf(">", pos)  1;

									          if (size < length)

									          {

									            sb.Append(content.Substring(pos, npos - pos));

									          }

									          else

									          {

									            if (!notr && next.IndexOf("/tr") == 0)

									            {

									              sb.Append(content.Substring(pos, npos - pos));

									              notr = true;

									            }

									          }

									        }

									        else if (next.IndexOf("td") == 0 || next.IndexOf("/td") == 0)

									        {

									          npos = content.IndexOf(">", pos)  1;

									          if (size < length)

									          {

									            sb.Append(content.Substring(pos, npos - pos));

									          }

									          else

									          {

									            if (!notr)

									            {

									              sb.Append(content.Substring(pos, npos - pos));

									            }

									          }

									        }

									        else

									        {

									          npos = content.IndexOf(">", pos)  1;

									          sb.Append(content.Substring(pos, npos - pos));

									        }

									        if (npos <= pos)

									          npos = pos  1;

									        pos = npos;

									      }

									      else

									      {

									        if (size < length)

									        {

									          sb.Append(cur);

									          size ;

									        }

									        else

									        {

									          if (!firststop)

									          {

									            sb.Append("……");

									            firststop = true;

									          }

									        }

									        pos ;

									      }

									    }

									    return sb.ToString();

									  }

									}

									#endregion

总结

以上就是利用 C#获取一段HTML文本中的第一张图片和截取内容摘要的全部内容，希望本文的内容对大家学习或者使用C#能有所帮助，如果有疑问大家可以留言交流，谢谢大家对服务器之家的支持。

秒客网

C#获取HTML文本的第一张图片与截取内容摘要示例代码

相关文章