首页 > 代码库 > Finder(文件内容搜索工具)

Finder(文件内容搜索工具)

搜索文件夹内容的小工具

Github

两种搜索模式的算法:

BoyerMooreSearch.cs

using System.Threading;using System.Collections.Generic;using System.Linq;namespace Finder.Algorithms{    /// <summary>    /// An implemention of Boyer-Moore algorithm.    /// <para/>author : Ornithopter    /// </summary>    class BoyerMooreSearch : SearchBase    {        /// <summary>        ///         /// </summary>        /// <param name="source"></param>        /// <param name="pattern"></param>        /// <param name="token"></param>        /// <returns>An array of matched index</returns>        public int[] Search(string source, string pattern, CancellationToken token)        {            var matchIndexes = new List<int>();            // step increasment.            int delta;            // prepare a map providing delta for each char in pattern string.            var deltaMap = CreateDeltaMap(pattern);            // start searching.            for (var i = pattern.Length - 1; i < source.Length; i += delta)            {                token.ThrowIfCancellationRequested();                // find next match and update delta.                if (FindNext(source, pattern, i, deltaMap, token, out delta))                {                    // add to result list if found.                    matchIndexes.Add(i - (pattern.Length - 1));                }            }            return matchIndexes.ToArray();        }        private static bool Match(string source, int[] deltaMap, string pattern, CancellationToken token)        {            // step increasment.            int delta;            // start searching.            for (var i = pattern.Length - 1; i < source.Length; i += delta)            {                token.ThrowIfCancellationRequested();                // find next match and update delta.                if (FindNext(source, pattern, i, deltaMap, token, out delta))                {                    return true;                }            }            return false;        }        /// <summary>        /// Find the next matched index and update delte at the same time.        /// </summary>        /// <param name="source"></param>        /// <param name="pattern"></param>        /// <param name="start"></param>        /// <param name="deltaMap"></param>        /// <param name="delta"></param>        /// <returns>true if found one, otherwise false.</returns>        private static bool FindNext(string source, string pattern, int start, int[] deltaMap, CancellationToken token, out int delta)        {            int i = pattern.Length - 1,                index = 0;            // start comparing from the last char in pattern.            while (source[start - index] == pattern[i - index])            {                token.ThrowIfCancellationRequested();                if (index != pattern.Length - 1)                {                    index++;                }                else                {                    // matchs to the end. So it‘s a search result.                    delta = pattern.Length;                    return true;                }            }            // found one dismatched char at (start - index), get delta from map.            var c = source[start - index];            delta = /*c > 128 ? 0 : */deltaMap[c];            if (delta == 0)            {                // this means the source[start] char is the last char in pattern                // and only appears once. So delta should be the length of pattern.                delta = pattern.Length;            }            return false;        }        static int[] CreateDeltaMap(string pattern)        {            const int alphabetSize = 0xffff;            var patternLength = pattern.Length;            var deltaMap = new int[alphabetSize];            // initialize the map.            for (var i = 0; i < alphabetSize; i++)            {                deltaMap[i] = patternLength;            }            // start from 0, which means any duplicated char will only have            // the index nearest to the end.            for (var i = 0; i < patternLength; i++)            {                var index = pattern[i];                deltaMap[index] = patternLength - i - 1;            }            return deltaMap;        }        protected override void Build(CancellationToken token)        {            //throw new NotImplementedException();        }        public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token)        {            var fileList = FileList;            var deltaMap = CreateDeltaMap(keyword);            return fileList.Where(filePath => Match(ReadContent(filePath), deltaMap, keyword, token)).ToArray();        }    }}
View Code

Trie.cs

using System.Threading;using System;using System.Collections.Generic;using System.Linq;namespace Finder.Algorithms{    class Trie : SearchBase    {        class Node        {            public Dictionary<char, Node> Next { get; private set; }            public Dictionary<int, bool> FileIndex { get; private set; }            public Node()            {                FileIndex = new Dictionary<int, bool>();                Next = new Dictionary<char, Node>();            }        }        private static readonly HashSet<char> Ignorance = new HashSet<char>        {            [,            (,            {,        };        private static readonly HashSet<char> Spliters = new HashSet<char>        {             ,            ,,            \n,            \r,            ;,            ],            ),            },        };        private Node _root;        protected override void Build(CancellationToken token)        {            _root = new Node();            var index = 0;            foreach (var filePath in FileList)            {                token.ThrowIfCancellationRequested();                Build(index++, ReadContent(filePath));            }        }        private void Build(int fileIndex, string content)        {            var node = _root;            foreach (var t in content)            {                var c = t;                if (Ignorance.Contains(c))                {                    continue;                }                if (Spliters.Contains(c))                {                    node.FileIndex[fileIndex] = true;                    node = _root;                    continue;                }                /*if (!Char.IsLetter(c))                    continue;*/                if (Char.IsUpper(c))                    c = Char.ToLower(c);                Node next;                if (!node.Next.TryGetValue(c, out next))                {                    next = new Node();                    node.Next[c] = next;                }                node = next;                node.FileIndex[fileIndex] = false;            }            node.FileIndex[fileIndex] = true;        }        private string[] Search(string keyword, bool matchWholeWord, CancellationToken token)        {            var node = _root;            foreach (var t in keyword)            {                token.ThrowIfCancellationRequested();                var c = t;                if (Char.IsUpper(c))                    c = Char.ToLower(c);                Node next;                if (!node.Next.TryGetValue(c, out next))                {                    return new string[0];                }                node = next;            }            if (matchWholeWord)            {                return node.FileIndex.Where(_ => _.Value).Select(_ => FileList[_.Key]).ToArray();            }            return node.FileIndex.Select(_ => FileList[_.Key]).ToArray();        }        public const string ConfigMatchWholeWord = "ConfigMatchWholeWord";        public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token)        {            var matchWholeWord = (bool)config[ConfigMatchWholeWord];            return Search(keyword, matchWholeWord, token);        }    }}
View Code