C#提取html中的汉字

时间:2021-09-30 15:59:01
using System.Text.RegularExpressions;      

private string StripHT(string strHtml) //从html中提取纯文本
{
Regex regex
= new Regex("<.+?>", RegexOptions.IgnoreCase);
string strOutput = regex.Replace(strHtml, "");//替换掉"<"和">"之间的内容
strOutput = strOutput.Replace("<", "");
strOutput
= strOutput.Replace(">", "");
strOutput
= strOutput.Replace("&nbsp;", "");
return strOutput;
}