首页 > 代码库 > 代码复审

代码复审

  对于结队编程抱到黎柱金同学的大腿让我轻松许多,结对伙伴的词频统计程序写的很好,现在我对大神代码做些个人意见的评价。

  伙伴的代码整体都很好,代码简洁,没冗余代码,而且用C#语言实现面向对象,层次化抽象清晰,任务非配合理,没有过多的类冗余属性,而且代码重用也应用很好,进一步使得代码简洁明了。

 1 using System; 2 using System.Collections.Generic; 3 using System.IO; 4 using System.Linq; 5 using System.Text; 6 using System.Text.RegularExpressions; 7 using System.Threading.Tasks; 8  9 namespace WordFrequency10 {11     class Program12     {13         static void Main(string[] args)14         {15             TallyMode mode;16             string path;17             string outputFileName = "lizhujin@outlook.com.txt";18 19             #region tally mode & path20             switch (args.Count())21             {22                 case 0:23                     Console.WriteLine("Please specify a directory!");24                     return;25                 case 1:26                     mode = TallyMode.Normal;27                     path = args[0];28                     break;29                 case 2:30                     if (args[0] == "-e2")31                         mode = TallyMode.E2;32                     else if (args[0] == "-e3")33                         mode = TallyMode.E3;34                     else35                     {36                         Console.WriteLine("The argument must be -e2 or -e3. Scanning cancelled.");37                         return;38                     }39                     path = args[1];40                     break;41                 default:42                     Console.WriteLine("Arguments incorrect. Scanning cancelled.");43                     return;44             }45             #endregion46 47             WordManager wordManager;48             try49             {50                 wordManager = new WordManager(path, mode);51             }52             catch (IOException e)53             {54                 Console.WriteLine(e.Message);55                 return;56             }57 58             Console.WriteLine("Path: {0}", path);59             Console.WriteLine("Start scanning...");60             wordManager.ScanAndCount(new List<string>() { ".txt", ".cpp", ".h", ".cs" }, false);61             Console.WriteLine("Complete!");62 63             Console.WriteLine(new string(-, Console.WindowWidth - 1));64             var writer = new StreamWriter(outputFileName, false, Encoding.Default);65             wordManager.Output(true, writer);66             //wordManager.Output(true, Console.Out);67             writer.Close();68             Console.WriteLine("The results have been saved to \"{0}\"", outputFileName);69         }70     }71 }

  伙伴将命令行输入可能出错进行判断,而且在控制台有人性化输出,让别人可以知道自己的输入问题所在,而且还进行异常处理,以防输入过程出现问题,使得程序更加稳定。在文件处理时也采用了异常处理,使得程序严谨正确性大大提高。

 

  1 using System;  2 using System.Collections.Generic;  3 using System.IO;  4 using System.Linq;  5 using System.Text;  6 using System.Text.RegularExpressions;  7 using System.Threading.Tasks;  8   9 namespace WordFrequency 10 { 11     class WordManager 12     { 13         /* the list is sorted by words‘ lowercases, and it‘s not of the final order */ 14         /* in consideration of extended mode,it can also used for list of "word group"s */ 15         SortedList<string, WordInfo> WordList = new SortedList<string, WordInfo>(); 16  17         DirectoryInfo RootDirectory; 18         TallyMode Mode; 19  20         public WordManager(string path, TallyMode mode) 21         { 22             if (!Directory.Exists(path)) 23                 throw new IOException("The directory specified doesn‘t exist!"); 24  25             RootDirectory = new DirectoryInfo(path); 26             Mode = mode; 27         } 28  29         /// <summary> 30         ///  31         /// </summary> 32         /// <param name="extensions"> 33         /// each extension should contains prefix dot, e.g., ".png" 34         /// </param> 35         public void ScanAndCount(IList<string> extensions, bool showLogs) 36         { 37             /* scan files and sub-directories recursively */ 38             var resultFileInfos = 39                 from fileInfo in RootDirectory.EnumerateFiles("*", SearchOption.AllDirectories) 40                 where ((from ext in extensions where ext.Equals(fileInfo.Extension, StringComparison.OrdinalIgnoreCase) select ext).Count() > 0) 41                 select fileInfo; 42  43             /* count words in each file */ 44             foreach (var fileInfo in resultFileInfos) 45             { 46                 if (showLogs) 47                     Console.WriteLine("Scanning {0}", fileInfo.FullName); 48                 CountWords(fileInfo); 49             } 50         } 51  52         public void Output(bool sort, TextWriter writer) 53         { 54             var wordInfoList = WordList.Values.ToList(); 55  56             if (sort) 57                 wordInfoList.Sort(); 58  59             if (Mode == TallyMode.Normal) 60                 foreach (var wordInfo in wordInfoList) 61                     writer.WriteLine("{0}: {1}", wordInfo.Word, wordInfo.Frequency); 62             else 63                 for (int i = 0; i < WordList.Count() && i < 10; i++) 64                     writer.WriteLine("{0}: {1}", wordInfoList[i].Word, wordInfoList[i].Frequency); 65         } 66  67         /* count words in specified file and store them to WordList */ 68         private void CountWords(FileInfo fileInfo) 69         { 70             FileStream readStream = fileInfo.OpenRead(); 71             StreamReader reader = new StreamReader(readStream, Encoding.Default); 72             string text = reader.ReadToEnd(); 73  74             int i = 0; 75             while (i < text.Length) 76             { 77                 string word; 78  79                 if ((word = CurrentWord(text, i)) == null) 80                 { 81                     i++; 82                     continue; 83                 } 84  85                 i += word.Length; 86  87                 string word1 = null; 88                 if (Mode == TallyMode.E2 || Mode == TallyMode.E3) 89                 { 90                     if (i >= text.Length || text[i] !=   || (word1 = CurrentWord(text, i + 1)) == null) 91                         continue; 92                     word +=   + word1; 93                 } 94  95                 string word2 = null; 96                 if (Mode == TallyMode.E3) 97                 { 98                     if (i + word1.Length + 1 >= text.Length || text[i + word1.Length + 1] !=   || 99                         (word2 = CurrentWord(text, i + word1.Length + 2)) == null)100                         continue;101                     word +=   + word2;102                 }103 104                 /* add this word to the list */105                 string lowerCase = word.ToLower();106                 if (WordList.ContainsKey(lowerCase))107                     WordList[lowerCase].Add(word);108                 else109                     WordList[lowerCase] = new WordInfo(word);110 111             }112         }113 114         /// <summary>115         /// if there is a legal string starting with s[i], returns it,116         /// otherwise returns null117         /// </summary>118         /// <param name="s"></param>119         /// <param name="i"></param>120         /// <returns></returns>121         private string CurrentWord(string s, int i)122         {123             if (i >= s.Length)124                 return null;125 126             /* not alphabetic */127             if (!IsAlphabetic(s[i]))128                 return null;129 130             /* alphabetic, but not following a delemeter */131             if (i > 0 && !IsDelimiter(s[i - 1]))132                 return null;133 134             /* legal start, but less than 3 alphabetic successively */135             if (i + 2 >= s.Length || !IsAlphabetic(s[i + 1]) || !IsAlphabetic(s[i + 2]))136                 return null;137 138             /* build word */139             StringBuilder wordBuilder = new StringBuilder(s.Substring(i, 3));140             i += 3;141             while (i < s.Length && IsAlphanumerical(s[i]))142             {143                 wordBuilder.Append(s[i]);144                 i++;145             }146             string word = wordBuilder.ToString();147 148             return word;149         }150 151         private bool IsAlphabetic(char c)152         {153             return c >= a && c <= z || c >= A && c <= Z;154         }155 156         private bool IsAlphanumerical(char c)157         {158             return IsAlphabetic(c) || c >= 0 && c <= 9;159         }160 161         private bool IsDelimiter(char c)162         {163             return !IsAlphanumerical(c);164         }165     }166 }

  代码中的类、函数与变量的命名体现了显式表达原则,让人一看就可以了解其作用与目的,可读性增添不少。

 

 1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Threading.Tasks; 6  7 namespace WordFrequency 8 { 9     class WordInfo : IComparable<WordInfo>10     {11         public string Word { get; private set; }12         public int Frequency { get; private set; }13 14         public WordInfo(string word)15         {16             Word = word;17             Frequency = 1;18         }19 20         public void Add(string newForm)21         {22             Frequency++;23 24             /* update the word */25             Word = string.CompareOrdinal(Word, newForm) < 0 ? Word : newForm;26         }27 28         public int CompareTo(WordInfo other)29         {30             /* frequency */31             if (this.Frequency > other.Frequency)32                 return -1;33             else if (this.Frequency < other.Frequency)34                 return 1;35             /* lexical */36             else37                 return string.CompareOrdinal(this.Word, other.Word);38         }39     }40 }

 

  伙伴的代码经过他本人的优化已经体现了正确性与高效性,但是也有些细节可以进一步提高。

  首先,代码中增添了些许注释,但是相对于代码来说还不是足够的,可以在再适当增添些,增加代码可读性。

 1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Threading.Tasks; 6  7 namespace WordFrequency 8 { 9     enum TallyMode10     {11         Normal,12         E2,13         E314     }15 }

  

  然后,他在模式二与模式三时的单词排序时,可以变换下算法,提高程序效率。他的代码在三种模式都是进行所有单词全部排序,而在模式二和模式三时,只需要输出TOP10频率的单词。因而,可以每次输出都遍历下全部单词,输出最高频率,此时的时间复杂度为O(10*N),比现在O(N*log(N))提高不少。

 

 

  最后,也是算法方面可以提高的问题,在WordManager.cs中,如下代码(在WorldManager.cs中的104行开始):

 

/* add this word to the list */                string lowerCase = word.ToLower();                if (WordList.ContainsKey(lowerCase))                    WordList[lowerCase].Add(word);                else                    WordList[lowerCase] = new WordInfo(word);

  

  

  这里if判断语句containskey和wordlist[lowercase]执行了两次对lowercase的查找,如果改成一次查找就能缩短一半的时间。像这样:

 /* add this word to the list */                string lowerCase = word.ToLower();                WordInfo value;                WordList.TryGetValue(lowerCase, out value);                if (value != null)                    value.Add(word);                else                    WordList[lowerCase] = new WordInfo(word);

 

 

 

  以上就是我对黎柱金同学的个人项目代码复审报告。

 

代码复审