.NET实现高效过滤敏感查找树算法（分词算法）：

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.Threading.Tasks;

namespace YY.SmsPlatform.Common

{

    [Serializable]

    public class TrieNode

    {

        public bool m_end;

        public Dictionary<Char, TrieNode> m_values;

        public TrieNode()

        {

            m_values = new Dictionary<Char, TrieNode>();

        }

        /// <summary>

        /// 添加词库

        /// </summary>

        /// <param name="key"></param>

        public void AddKey(string key)

        {

            if (string.IsNullOrEmpty(key))

            {

                return;

            }

            TrieNode node = this;

            for (int i = ; i < key.Length; i++)

            {

                char c = key[i];

                TrieNode subnode;

                if (!node.m_values.TryGetValue(c, out subnode))

                {

                    subnode = new TrieNode();

                    node.m_values.Add(c, subnode);

                }

                node = subnode;

            }

            node.m_end = true;

        }

    }

    /// <summary>

    ///

    /// </summary>

    [Serializable]//注解部分可不加，和本算法没有关系

    public class TrieFilter

    {

        /// <summary>

        /// 检查是否包含非法字符

        /// </summary>

        /// <param name="text">输入文本</param>

        /// <returns>找到返回true.没有则返回false</returns>

        //public bool HasBadWord(string text)

        //{

        //    for (int i = 0; i < text.Length; i++)

        //    {

        //        TrieNode node;

        //        if (m_values.TryGetValue(text[i], out node))

        //        {

        //            for (int j = i + 1; j < text.Length; j++)

        //            {

        //                if (node.m_values.TryGetValue(text[j], out node))

        //                {

        //                    if (node.m_end)

        //                    {

        //                        return true;

        //                    }

        //                }

        //                else

        //                {

        //                    break;

        //                }

        //            }

        //        }

        //    }

        //    return false;

        //}

　　 　　/// <summary> 
　　 　　/// 检查是否包含非法字符 
　　 　　/// </summary> 
　　 　　/// <param name="text">输入文本</param>
 　　　　/// <returns>找到的第1个非法字符.没有则返回string.Empty</returns>

        public static bool HasBadWord(string text,TrieNode rootNode)

        {

            for (int i = ; i < text.Length; i++)

            {

                TrieNode node;

                if (rootNode.m_values.TryGetValue(text[i], out node))

                {

                    for (int j = i + ; j < text.Length; j++)

                    {

                        if (node.m_values.TryGetValue(text[j], out node))

                        {

                            if (node.m_end)

                            {

                                return true;

                            }

                        }

                        else

                        {

                            break;

                        }

                    }

                }

            }

            return false;

        }

        /// <summary>

        /// 检查是否包含非法字符

        /// </summary>

        /// <param name="text">输入文本</param>

        /// <returns>找到的第1个非法字符.没有则返回string.Empty</returns>

        public static string FindOne(string text,TrieNode rootNode)

        {

            for (int i = ; i < text.Length; i++)

            {

                char c = text[i];

                TrieNode node;

                if (rootNode.m_values.TryGetValue(c, out node))

                {

                    for (int j = i + ; j < text.Length; j++)

                    {

                        if (node.m_values.TryGetValue(text[j], out node))

                        {

                            if (node.m_end)

                            {

                                return text.Substring(i, j +  - i);

                            }

                        }

                        else

                        {

                            break;

                        }

                    }

                }

            }

            return string.Empty;

        }

        //查找所有非法字符

        public static IEnumerable<string> FindAll(string text,TrieNode rootNode)

        {

            for (int i = ; i < text.Length; i++)

            {

                TrieNode node;

                if (rootNode.m_values.TryGetValue(text[i], out node))

                {

                    for (int j = i + ; j < text.Length; j++)

                    {

                        if (node.m_values.TryGetValue(text[j], out node))

                        {

                            if (node.m_end)

                            {

                                yield return text.Substring(i, (j +  - i));

                            }

                        }

                        else

                        {

                            break;

                        }

                    }

                }

            }

        }

        /// <summary>

        /// 替换非法字符

        /// </summary>

        /// <param name="text"></param>

        /// <param name="c">用于代替非法字符</param>

        /// <returns>替换后的字符串</returns>

        public string Replace(string text,TrieNode rootNode)

        //public string Replace(string text, char c = '*')

        {

            char[] chars = null;

            string str = "";

            for (int i = ; i < text.Length; i++)

            {

                TrieNode subnode;

                if (rootNode.m_values.TryGetValue(text[i], out subnode))

                {

                    for (int j = i + ; j < text.Length; j++)

                    {

                        if (subnode.m_values.TryGetValue(text[j], out subnode))

                        {

                            if (subnode.m_end)

                            {

                                if (chars == null) chars = text.ToArray();

                                for (int t = i; t <= j; t++)

                                {

                                    str+= chars[t];

                                }

                                i = j;

                            }

                        }

                        else

                        {

                            break;

                        }

                    }

                }

            }

            return  chars == null ? text : str;

        }

    }

}
*注意事项：如果词库中有如：“我们”，“我们的”这样的重复词语，在查找内容中有“我们的”这样的语句则会重复出现“我们”，“我们的”（使用FindAll（）方法）
秒客网

.NET实现高效过滤敏感查找树算法（分词算法）：

相关文章