处理中英文字符长度截取

   public static class SomeString
    {
        /// <summary>
        /// 根据中英文 截取字符 （慎用!）
        /// </summary>
        /// <param name="obj"></param>
        /// <param name="count">截取的长度，注意：长度为英文长度，即一汉字等于二英文长度</param>
        /// <returns></returns>
        public static string ToSomeString(this object obj, int count = 0)
        {
            string str = obj.ToString();

            if (count == 0)
                return string.Empty;

            int temp = 0;

            //中文 英文 字符
            string ptn = "[a-zA-Z]+|[0-9]+|[\u4e00-\u9fa5]+|[^a-zA-Z0-9\u4e00-\u9fa5]+"; //"[\u4e00-\u9fa5]+|[a-zA-Z\\s]+";

            System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(ptn);
            //匹配中文
            System.Text.RegularExpressions.Regex regTemp = new System.Text.RegularExpressions.Regex("[\u4e00-\u9fa5]+");

            List<string> list = new List<string>();

            for (int i = 0; i < reg.Matches(str).Count; i++)
            {
                string value = reg.Matches(str)[i].Value;

                //中文
                if (!string.IsNullOrWhiteSpace(regTemp.Match(value).Value))
                {
                    //判断当前字符长度是否已经超过截取长度
                    if (value.Length * 2 <= count - temp)
                    {
                        list.Add(value);
                    }
                    else
                    {
                        if (temp < count)
                            list.Add(value.Substring(0, ((count - temp) / 2)) + "…");
                    }
                    temp += value.Length * 2;
                }
                else
                {
                    if (value.Length <= count - temp)
                    {
                        list.Add(value);
                    }
                    else
                    {
                        if (temp < count)
                            list.Add(value.Substring(0, (count - temp)) + "…");
                    }
                    temp += value.Length;
                }
            }
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            foreach (string a in list)
            {
                sb.Append(a);
            }
            return sb.ToString();
        }
    }

主要思想：用正则表达式匹配字符段，讲不同编码的字符分割开，然后根据字节长度，逐个处理，然后添加到字符串中，如果长度大约限定长度，则截取，并追加"..."。

秒客网

处理中英文字符长度截取

相关文章