公司的网站首页会显示新闻,要求把新闻的标题按半角字符算半个字符,全角字符算一个字符来截短。
想了两种方法,不过觉得效率好像一般。但苦于找不到更好的方法。如果有朋友有更好的方法请指教
第一种是用正则表达式:
/// <summary>
/// 将标题按全角字符当一个Unicode字符,半角字符当半个Unicode字符处理来截短
/// </summary>
/// <param name="pOriginalNews"></param>
/// <param name="len"></param>
/// <returns></returns>
public string LimitLengthString( object pOriginalNews, int len)
{
string originalNews = pOriginalNews.ToString();
Regex regx = new Regex(@"[^一-龥“”<>():?!—《》]");//非汉字和一些常用全角符号
char[] newsChars = originalNews.ToCharArray();
Stack totalSubChars = new Stack();//要截短的字符
int narrowChars = 0;//英文和半角符号相当于一个半角字符
int widthChars = 0;//两个半角相当于一个全角
foreach(char c in newsChars)
{
if(widthChars == len)
break;
totalSubChars.Push(c);
if(regx.IsMatch(c.ToString()))
{
narrowChars ++;
if(narrowChars == 2)
{
narrowChars = 0;
widthChars++;//找到两个半角的字符
}
}
else
widthChars++;//找到全角的字符
}
return originalNews.Substring(0, totalSubChars.Count);
}
/// 将标题按全角字符当一个Unicode字符,半角字符当半个Unicode字符处理来截短
/// </summary>
/// <param name="pOriginalNews"></param>
/// <param name="len"></param>
/// <returns></returns>
public string LimitLengthString( object pOriginalNews, int len)
{
string originalNews = pOriginalNews.ToString();
Regex regx = new Regex(@"[^一-龥“”<>():?!—《》]");//非汉字和一些常用全角符号
char[] newsChars = originalNews.ToCharArray();
Stack totalSubChars = new Stack();//要截短的字符
int narrowChars = 0;//英文和半角符号相当于一个半角字符
int widthChars = 0;//两个半角相当于一个全角
foreach(char c in newsChars)
{
if(widthChars == len)
break;
totalSubChars.Push(c);
if(regx.IsMatch(c.ToString()))
{
narrowChars ++;
if(narrowChars == 2)
{
narrowChars = 0;
widthChars++;//找到两个半角的字符
}
}
else
widthChars++;//找到全角的字符
}
return originalNews.Substring(0, totalSubChars.Count);
}
第二种是将字符串转为unicode的byte数组.然后判断其第二个byte是否为0,如果为0则在ascii码
/// <summary>
/// 将标题按全角字符当一个Unicode字符,半角字符当半个Unicode字符处理来截短
/// </summary>
/// <param name="pOriginalNews"></param>
/// <param name="len"></param>
/// <returns></returns>
public string LimitLengthString( object pOriginalNews, int len)
{
string originalNews = pOriginalNews.ToString().Trim();
UnicodeEncoding encoding = new UnicodeEncoding();
byte[] newsBytes = encoding.GetBytes(originalNews);
StringBuilder sb = new StringBuilder();//最终要截出来的字符
int narrowChars = 0;//英文和半角符号相当于一个半角字符
int widthChars = 0;//两个半角相当于一个全角
for(int i=1; i < newsBytes.Length; i +=2)
{
if(widthChars == len)
break;
byte[] temp = new byte[]{newsBytes[i-1], newsBytes[i]};
sb.Append(Encoding.Unicode.GetString(temp));
//检查位置为偶数的字符是否为0,若为0,则为窄字符
if((int)newsBytes[i] == 0)
{
narrowChars ++;
if(narrowChars == 2)
{
narrowChars = 0;
widthChars++;
}
}
else
{
widthChars++;
}
}
return sb.ToString();
}