首页 > 代码库 > Trie(C#)

Trie(C#)

TrieSearch.cs

//#define WEB

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;


namespace TrieCS
{
    class MatchInfoComparer : IEqualityComparer<MatchInfo>
    {
        public bool Equals(MatchInfo x, MatchInfo y)
        {
            return x.Index.Equals(y.Index);
        }

        public int GetHashCode(MatchInfo obj)
        {
            return obj.Index;
        }
    }

    public class PerfInfo
    {
        public double FilterAddMs { get; internal set; }
        public double TotalMs { get; internal set; }
    }

    class SearchResult
    {
        public int Index { get; set; }
        public List<int> Positions { get; set; }
        public int Priority { get; set; }
        public string Content { get; set; }

        public SearchResult()
        {
            Positions = new List<int>();
        }
    }

    public class MatchInfo : IEquatable<MatchInfo>, IComparable<MatchInfo>
    {
        public int Index { get; set; }
        public int Position { get; set; }
        public int Priority { get; set; }
        public char Data { get; set; }


        public override string ToString()
        {
            return String.Format("[{0}.{1}]", Index, Position);
        }

        public bool Equals(MatchInfo other)
        {
            return Index == other.Index && Position == other.Position;
        }

        public int CompareTo(MatchInfo other)
        {
            return this.Index.CompareTo(other.Index);
        }
    }

    class Node
    {
        public char Data { get; set; }
        public List<MatchInfo> Infos { get; set; }
        public Hashtable Next { get; set; }
        public Node()
        {
            Next = new Hashtable(26);
            Infos = new List<MatchInfo>();
        }
        public override string ToString()
        {
            return Data.ToString();
        }
    }

    class TrieSearch
    {
        const string ALPHABET = "abcdefghijklmnopqrstyvwxyz";
        MatchInfoComparer _comparer = new MatchInfoComparer();
        HiPerfTimer _timer = new HiPerfTimer();

        public PerfInfo LastPerfInfo { get; private set; }

        public Node Root { get; private set; }

        Hashtable _nodes = new Hashtable();

        public Hashtable Nodes
        {
            get
            {
                return _nodes;
            }
        }

        private string[] _keywords;
        public string[] Keywords
        {
            get { return _keywords; }
            set
            {
                _keywords = value;
                Root = new Node();
                _nodes = new Hashtable();
                for (int i = 0; i < _keywords.Length; i++)
                {
                    BuildTree(Root, _keywords[i], i);
                }
            }
        }

        private void BuildTree(Node root, string keyword, int index)
        {
            Stack<Node> parents = new Stack<Node>();
            Node node = root;
            bool flag_start = true;
            for (int j = 0; j < keyword.Length; j++)
            {
                parents.Push(node);

                var c = keyword[j];

                var info = new MatchInfo
                {
                    Index = index,
                    Position = j,
                    Priority = j == 0 ? 2 : 1,
                    Data = c,
                };
                if (flag_start)
                {
                    flag_start = false;
                    info.Priority += 1;
                }
                if (Char.IsUpper(c))
                    info.Priority += 1;

                c = Char.ToLower(c);

                Node newNode = node.Next[c] as Node;
                if (newNode == null)
                {
#if WEB
                    if (_nodes[c] == null)
                        _nodes[c] = new Node();
                    newNode = _nodes[c] as Node;
#else
                    newNode = new Node();
#endif
                    newNode.Data = c;
                    node.Next[c] = newNode;
                }
#if WEB
                foreach (var p in parents)
                {
                    if (p == newNode) continue;
                    p.Next[c] = newNode;
                }
#endif
                newNode.Infos.Add(info);

                node = newNode;
            }
        }

        public List<MatchInfo> Search(string data)
        {
            LastPerfInfo = new PerfInfo();
            var infoList = new List<MatchInfo>();
            int pos = 0;
            bool first = true;
            Search(data, ref pos, Root, infoList, ref first);
            return infoList.ToList();
        }

        public SearchResult[] Convert(List<MatchInfo> matchInfos)
        {
            var results = new Dictionary<int, SearchResult>();
            var groups = matchInfos.GroupBy(_=>_.Data);
            foreach (var group in groups)
            {
                foreach (var sr in group)
                {
                    SearchResult result;
                    if (!results.TryGetValue(sr.Index, out result))
                    {
                        result = new SearchResult();
                        result.Content = Keywords[sr.Index];
                        results[sr.Index] = result;
                    }
                    if (!result.Positions.Contains(sr.Position))
                    {
                        result.Positions.Add(sr.Position);
                        if (result.Priority < sr.Priority)
                            result.Priority = sr.Priority;
                    }
                }
            }

            var c = new Comparison<int>((a, b) => { return a.CompareTo(b); });
            var continues = new Func<IEnumerable<int>, bool>(list =>
            {
                int last = list.ElementAt(0);
                foreach (var item in list.Skip(1))
                {
                    if (item != last + 1)
                        return false;
                    last = item;
                }
                return true;
            });
            foreach (var item in results.Values)
            {
                item.Positions.Sort(c);
                if (continues(item.Positions))
                    item.Priority += 2;
            }
            return results.Values.ToArray();
        }
#if WEB
        private void Search(string data, int pos, Node node, List<MatchInfo> infoList)
        {
            if (node == null) return;

            var lastNode = node;
            bool flag_first = true;

            for (int i = pos; i < data.Length; i++)
            {
                var c = data[i];
                node = node.Next[c] as Node;
                if (node == null)
                {
                    infoList.Clear();
                    break;
                }
                else
                {
                    if (flag_first)
                    {
                        flag_first = false;
                        infoList.AddRange(node.Infos);
                    }
                    else
                    {
                        _timer.Start();

                        var inter = infoList.Intersect(node.Infos, _comparer).ToLookup(_=>_.Index);
                        infoList.AddRange(node.Infos);
                        for (int ii = 0; ii < infoList.Count; ii++)
                        {
                            if (!inter.Contains(infoList[ii].Index))
                                infoList.RemoveAt(ii--);
                        }

                        _timer.Stop();
                        LastPerfInfo.FilterAddMs += _timer.Duration * 1000.0;
                    }
                }
            }
            return;
        }
#else
        private void Search(string data, ref int pos, Node node, List<MatchInfo> infoList, ref bool flag_first)
        {
            if (node == null) return;

            Node lastNode;

            for (; pos < data.Length; pos++)
            {
                var c = data[pos];
                lastNode = node;
                node = node.Next[c] as Node;
                if (node == null)
                {
                    foreach (var nc in ALPHABET)
                    {
                        if (nc == c) continue;
                        if (lastNode.Next[nc] == null) continue;
                        Search(data, ref pos, lastNode.Next[nc] as Node, infoList, ref flag_first);
                    }
                    break;
                }
                else
                {
                    if (flag_first)
                    {
                        flag_first = false;
                        infoList.AddRange(node.Infos);
                    }
                    else
                    {
                        _timer.Start();

                        var inter = infoList.Intersect(node.Infos, _comparer).ToLookup(_=>_.Index);
                        infoList.AddRange(node.Infos);
                        for (int ii = 0; ii < infoList.Count; ii++)
                        {
                            if (!inter.Contains(infoList[ii].Index))
                                infoList.RemoveAt(ii--);
                        }

                        _timer.Stop();
                        LastPerfInfo.FilterAddMs += _timer.Duration * 1000.0;
                    }
                }
            }
            return;
        }
#endif
    }
}

MainWindow.cs

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;

namespace TrieCS
{
    /// <summary>
    /// MainWindow.xaml 的交互逻辑
    /// </summary>
    public partial class MainWindow : Window
    {
        TrieSearch _trie = new TrieSearch();

        HiPerfTimer _timer = new HiPerfTimer();

        public MainWindow()
        {
            InitializeComponent();


            Initiate();
        }

        public void Initiate()
        {
        }

        private void content_TextChanged_1(object sender, TextChangedEventArgs e)
        {
            _trie.Keywords = content.Text.Replace("\r\n", " ").Replace("\r", " ").Replace("\n", " ").Split( );
            NodesListBox.ItemsSource = _trie.Nodes.OfType<DictionaryEntry>().Select(_=>_.Value).ToList();
            DisplayResults();
        }

        private void keyword_TextChanged_1(object sender, TextChangedEventArgs e)
        {
            DisplayResults();
        }

        private void DisplayResults()
        {
            _timer.Start();
            var results = _trie.Convert(_trie.Search(keyword.Text));
            Array.Sort(results, new Comparison<SearchResult>((a, b) =>
            {
                return b.Priority.CompareTo(a.Priority);
            }));
            _timer.Stop();
            time.Text = String.Format("Total:{0:0.00000}ms, FilterAdd:{1:0.00000}ms, Total words:{2}",
                _timer.Duration * 1000.0,
                _trie.LastPerfInfo.FilterAddMs,
                _trie.Keywords.Length);
            result.ItemsSource = results;
        }
    }
}

Mainwindow.xaml

<Window x:Class="TrieCS.MainWindow"
        xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
        xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
        Title="MainWindow" Height="350" Width="525">
    <Grid>
        <Grid.ColumnDefinitions>
            <ColumnDefinition Width="*"/>
            <ColumnDefinition Width="170"/>
        </Grid.ColumnDefinitions>
        <Grid.RowDefinitions>
            <RowDefinition Height="*"/>
            <RowDefinition Height="Auto"/>
            <RowDefinition Height="*"/>
            <RowDefinition Height="Auto"/>
        </Grid.RowDefinitions>
        <TextBox x:Name="content" Grid.Row="0" TextChanged="content_TextChanged_1" AcceptsReturn="True"/>
        <TextBox x:Name="keyword" Grid.Row="1" TextChanged="keyword_TextChanged_1" Height="24"/>
        <ListView x:Name="result" Grid.Row="2">
            <ListView.View>
                <GridView>
                    <GridViewColumn Width="50" DisplayMemberBinding="{Binding Priority}" Header="匹配度" />
                    <GridViewColumn Width="50" DisplayMemberBinding="{Binding Index}" Header="序号" />
                    <!--<GridViewColumn Width="50" DisplayMemberBinding="{Binding Position}" Header="位置" />-->
                    <GridViewColumn Width="250" DisplayMemberBinding="{Binding Content}" Header="内容" />
                </GridView>
            </ListView.View>
        </ListView>
        <TextBlock x:Name="time" Grid.Row="3" Height="24"/>
        
        <Grid Grid.Column="1" Grid.RowSpan="3">
            <Grid.RowDefinitions>
                <RowDefinition Height="*"/>
                <RowDefinition Height="*"/>
                <RowDefinition Height="*"/>
            </Grid.RowDefinitions>
            <ListBox
                x:Name="NodesListBox"
                DisplayMemberPath="Data"></ListBox>
            <ListBox
                x:Name="MatchInfoListBox"
                DataContext="{Binding Path=SelectedItem, ElementName=NodesListBox}"
                ItemsSource="{Binding Infos}"
                Grid.Row="1"/>
            <ListBox
                x:Name="NextNodesListBox"
                DataContext="{Binding Path=SelectedItem, ElementName=NodesListBox}"
                ItemsSource="{Binding Next}"
                DisplayMemberPath="Value"
                Grid.Row="2"/>
        </Grid>
    </Grid>
</Window>

HiPerfTimer

/* High precision timer class
 *   - http://www.eggheadcafe.com/articles/20021111.asp
 * 
 *  (Thanks to the author!)
 */
using System;
using System.Runtime.InteropServices;
using System.ComponentModel;
using System.Threading;

namespace TrieCS
{
    public class HiPerfTimer
    {
        [DllImport("Kernel32.dll")]
        private static extern bool QueryPerformanceCounter(out long lpPerformanceCount);
        [DllImport("Kernel32.dll")]
        private static extern bool QueryPerformanceFrequency(out long lpFrequency);
        private long startTime;
        private long stopTime;
        private long freq;
        /// <summary>
        /// ctor
        /// </summary>
        public HiPerfTimer()
        {
            startTime = 0;
            stopTime = 0;
            freq = 0;
            if (QueryPerformanceFrequency(out freq) == false)
            {
                throw new Win32Exception(); // timer not supported
            }
        }
        /// <summary>
        /// Start the timer
        /// </summary>
        /// <returns>long - tick count</returns>
        public long Start()
        {
            QueryPerformanceCounter(out startTime);
            return startTime;
        }
        /// <summary>
        /// Stop timer 
        /// </summary>
        /// <returns>long - tick count</returns>
        public long Stop()
        {
            QueryPerformanceCounter(out stopTime);
            return stopTime;
        }
        /// <summary>
        /// Return the duration of the timer (in seconds)
        /// </summary>
        /// <returns>double - duration</returns>
        public double Duration
        {
            get
            {
                return (double)(stopTime - startTime) / (double)freq;
            }
        }
        /// <summary>
        /// Frequency of timer (no counts in one second on this machine)
        /// </summary>
        ///<returns>long - Frequency</returns>
        public long Frequency
        {
            get
            {
                QueryPerformanceFrequency(out freq);
                return freq;
            }
        }
    }
}