首页 > 代码库 > 代码复审
代码复审
对于结队编程抱到黎柱金同学的大腿让我轻松许多,结对伙伴的词频统计程序写的很好,现在我对大神代码做些个人意见的评价。
伙伴的代码整体都很好,代码简洁,没冗余代码,而且用C#语言实现面向对象,层次化抽象清晰,任务非配合理,没有过多的类冗余属性,而且代码重用也应用很好,进一步使得代码简洁明了。
1 using System; 2 using System.Collections.Generic; 3 using System.IO; 4 using System.Linq; 5 using System.Text; 6 using System.Text.RegularExpressions; 7 using System.Threading.Tasks; 8 9 namespace WordFrequency10 {11 class Program12 {13 static void Main(string[] args)14 {15 TallyMode mode;16 string path;17 string outputFileName = "lizhujin@outlook.com.txt";18 19 #region tally mode & path20 switch (args.Count())21 {22 case 0:23 Console.WriteLine("Please specify a directory!");24 return;25 case 1:26 mode = TallyMode.Normal;27 path = args[0];28 break;29 case 2:30 if (args[0] == "-e2")31 mode = TallyMode.E2;32 else if (args[0] == "-e3")33 mode = TallyMode.E3;34 else35 {36 Console.WriteLine("The argument must be -e2 or -e3. Scanning cancelled.");37 return;38 }39 path = args[1];40 break;41 default:42 Console.WriteLine("Arguments incorrect. Scanning cancelled.");43 return;44 }45 #endregion46 47 WordManager wordManager;48 try49 {50 wordManager = new WordManager(path, mode);51 }52 catch (IOException e)53 {54 Console.WriteLine(e.Message);55 return;56 }57 58 Console.WriteLine("Path: {0}", path);59 Console.WriteLine("Start scanning...");60 wordManager.ScanAndCount(new List<string>() { ".txt", ".cpp", ".h", ".cs" }, false);61 Console.WriteLine("Complete!");62 63 Console.WriteLine(new string(‘-‘, Console.WindowWidth - 1));64 var writer = new StreamWriter(outputFileName, false, Encoding.Default);65 wordManager.Output(true, writer);66 //wordManager.Output(true, Console.Out);67 writer.Close();68 Console.WriteLine("The results have been saved to \"{0}\"", outputFileName);69 }70 }71 }
伙伴将命令行输入可能出错进行判断,而且在控制台有人性化输出,让别人可以知道自己的输入问题所在,而且还进行异常处理,以防输入过程出现问题,使得程序更加稳定。在文件处理时也采用了异常处理,使得程序严谨正确性大大提高。
1 using System; 2 using System.Collections.Generic; 3 using System.IO; 4 using System.Linq; 5 using System.Text; 6 using System.Text.RegularExpressions; 7 using System.Threading.Tasks; 8 9 namespace WordFrequency 10 { 11 class WordManager 12 { 13 /* the list is sorted by words‘ lowercases, and it‘s not of the final order */ 14 /* in consideration of extended mode,it can also used for list of "word group"s */ 15 SortedList<string, WordInfo> WordList = new SortedList<string, WordInfo>(); 16 17 DirectoryInfo RootDirectory; 18 TallyMode Mode; 19 20 public WordManager(string path, TallyMode mode) 21 { 22 if (!Directory.Exists(path)) 23 throw new IOException("The directory specified doesn‘t exist!"); 24 25 RootDirectory = new DirectoryInfo(path); 26 Mode = mode; 27 } 28 29 /// <summary> 30 /// 31 /// </summary> 32 /// <param name="extensions"> 33 /// each extension should contains prefix dot, e.g., ".png" 34 /// </param> 35 public void ScanAndCount(IList<string> extensions, bool showLogs) 36 { 37 /* scan files and sub-directories recursively */ 38 var resultFileInfos = 39 from fileInfo in RootDirectory.EnumerateFiles("*", SearchOption.AllDirectories) 40 where ((from ext in extensions where ext.Equals(fileInfo.Extension, StringComparison.OrdinalIgnoreCase) select ext).Count() > 0) 41 select fileInfo; 42 43 /* count words in each file */ 44 foreach (var fileInfo in resultFileInfos) 45 { 46 if (showLogs) 47 Console.WriteLine("Scanning {0}", fileInfo.FullName); 48 CountWords(fileInfo); 49 } 50 } 51 52 public void Output(bool sort, TextWriter writer) 53 { 54 var wordInfoList = WordList.Values.ToList(); 55 56 if (sort) 57 wordInfoList.Sort(); 58 59 if (Mode == TallyMode.Normal) 60 foreach (var wordInfo in wordInfoList) 61 writer.WriteLine("{0}: {1}", wordInfo.Word, wordInfo.Frequency); 62 else 63 for (int i = 0; i < WordList.Count() && i < 10; i++) 64 writer.WriteLine("{0}: {1}", wordInfoList[i].Word, wordInfoList[i].Frequency); 65 } 66 67 /* count words in specified file and store them to WordList */ 68 private void CountWords(FileInfo fileInfo) 69 { 70 FileStream readStream = fileInfo.OpenRead(); 71 StreamReader reader = new StreamReader(readStream, Encoding.Default); 72 string text = reader.ReadToEnd(); 73 74 int i = 0; 75 while (i < text.Length) 76 { 77 string word; 78 79 if ((word = CurrentWord(text, i)) == null) 80 { 81 i++; 82 continue; 83 } 84 85 i += word.Length; 86 87 string word1 = null; 88 if (Mode == TallyMode.E2 || Mode == TallyMode.E3) 89 { 90 if (i >= text.Length || text[i] != ‘ ‘ || (word1 = CurrentWord(text, i + 1)) == null) 91 continue; 92 word += ‘ ‘ + word1; 93 } 94 95 string word2 = null; 96 if (Mode == TallyMode.E3) 97 { 98 if (i + word1.Length + 1 >= text.Length || text[i + word1.Length + 1] != ‘ ‘ || 99 (word2 = CurrentWord(text, i + word1.Length + 2)) == null)100 continue;101 word += ‘ ‘ + word2;102 }103 104 /* add this word to the list */105 string lowerCase = word.ToLower();106 if (WordList.ContainsKey(lowerCase))107 WordList[lowerCase].Add(word);108 else109 WordList[lowerCase] = new WordInfo(word);110 111 }112 }113 114 /// <summary>115 /// if there is a legal string starting with s[i], returns it,116 /// otherwise returns null117 /// </summary>118 /// <param name="s"></param>119 /// <param name="i"></param>120 /// <returns></returns>121 private string CurrentWord(string s, int i)122 {123 if (i >= s.Length)124 return null;125 126 /* not alphabetic */127 if (!IsAlphabetic(s[i]))128 return null;129 130 /* alphabetic, but not following a delemeter */131 if (i > 0 && !IsDelimiter(s[i - 1]))132 return null;133 134 /* legal start, but less than 3 alphabetic successively */135 if (i + 2 >= s.Length || !IsAlphabetic(s[i + 1]) || !IsAlphabetic(s[i + 2]))136 return null;137 138 /* build word */139 StringBuilder wordBuilder = new StringBuilder(s.Substring(i, 3));140 i += 3;141 while (i < s.Length && IsAlphanumerical(s[i]))142 {143 wordBuilder.Append(s[i]);144 i++;145 }146 string word = wordBuilder.ToString();147 148 return word;149 }150 151 private bool IsAlphabetic(char c)152 {153 return c >= ‘a‘ && c <= ‘z‘ || c >= ‘A‘ && c <= ‘Z‘;154 }155 156 private bool IsAlphanumerical(char c)157 {158 return IsAlphabetic(c) || c >= ‘0‘ && c <= ‘9‘;159 }160 161 private bool IsDelimiter(char c)162 {163 return !IsAlphanumerical(c);164 }165 }166 }
代码中的类、函数与变量的命名体现了显式表达原则,让人一看就可以了解其作用与目的,可读性增添不少。
1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Threading.Tasks; 6 7 namespace WordFrequency 8 { 9 class WordInfo : IComparable<WordInfo>10 {11 public string Word { get; private set; }12 public int Frequency { get; private set; }13 14 public WordInfo(string word)15 {16 Word = word;17 Frequency = 1;18 }19 20 public void Add(string newForm)21 {22 Frequency++;23 24 /* update the word */25 Word = string.CompareOrdinal(Word, newForm) < 0 ? Word : newForm;26 }27 28 public int CompareTo(WordInfo other)29 {30 /* frequency */31 if (this.Frequency > other.Frequency)32 return -1;33 else if (this.Frequency < other.Frequency)34 return 1;35 /* lexical */36 else37 return string.CompareOrdinal(this.Word, other.Word);38 }39 }40 }
伙伴的代码经过他本人的优化已经体现了正确性与高效性,但是也有些细节可以进一步提高。
首先,代码中增添了些许注释,但是相对于代码来说还不是足够的,可以在再适当增添些,增加代码可读性。
1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Threading.Tasks; 6 7 namespace WordFrequency 8 { 9 enum TallyMode10 {11 Normal,12 E2,13 E314 }15 }
然后,他在模式二与模式三时的单词排序时,可以变换下算法,提高程序效率。他的代码在三种模式都是进行所有单词全部排序,而在模式二和模式三时,只需要输出TOP10频率的单词。因而,可以每次输出都遍历下全部单词,输出最高频率,此时的时间复杂度为O(10*N),比现在O(N*log(N))提高不少。
最后,也是算法方面可以提高的问题,在WordManager.cs中,如下代码(在WorldManager.cs中的104行开始):
/* add this word to the list */ string lowerCase = word.ToLower(); if (WordList.ContainsKey(lowerCase)) WordList[lowerCase].Add(word); else WordList[lowerCase] = new WordInfo(word);
这里if判断语句containskey和wordlist[lowercase]执行了两次对lowercase的查找,如果改成一次查找就能缩短一半的时间。像这样:
/* add this word to the list */ string lowerCase = word.ToLower(); WordInfo value; WordList.TryGetValue(lowerCase, out value); if (value != null) value.Add(word); else WordList[lowerCase] = new WordInfo(word);
以上就是我对黎柱金同学的个人项目代码复审报告。
代码复审