首页 > 代码库 > C++学习之IO流

C++学习之IO流

本博文主要介绍IO流中fstream,iterator的简单应用。

问题描述:

a):找一篇文章(The Bble Holy) ,将其所有的单词输入vector中,并统计其单词个数

b):增加统计词频功能,用结构体实现(word ,count) ,将结构体存入vector中

c):增加体用词功能。

  1):从网上找 英文停用词,并命名为 stop_list.txt;

  2):统计词频时,若遇到此类词直接略过。

d):计算上述话费时间,具体有:

  1):读取文件时间;

  2):排序所用时间;

  3):打印多用时间。

代码如下:

  1 #include <iostream>  2 #include <string>  3 #include <vector>  4 #include <stdio.h>  5 #include <stdlib.h>  6 #include <string.h>  7 #include <algorithm>  8 #include <fstream>  9 #include <ctype.h> 10 #include <sys/time.h> 11 #include <stdexcept> 12 using namespace std ; 13  14 struct Word  15 { 16     string word_ ; 17     int count_ ; 18 }; 19 //readfile to vec 20 void ReadFile(ifstream &in, vector<Word> &vec, const vector<string>&stop ); 21 //readstoplist to stop 22 void ReadStopList( ifstream& in , vector<string> &stop); 23 //judge s is a stopword or not 24 bool IsStopWord(const vector<string>&stop , const string s ); 25 //treans s to lower one 26 void stringToLower(string&s); 27 //judge s include punct or not 28 void ErasePunct(string &s); 29 //judge vec include s or not & add word to vec  30 void AddWordToDict(vector<Word> &vec ,const string s); 31 //sort word by frequency 32 void WordSort(vector<Word> &vec ); 33 //print frequency of a word 34 void printFrequency( const vector<Word>&vec ) ; 35 //calculate runing_time 36 int64_t getTime(); 37 int main(int argc, const char *argv[]) 38 { 39     // readfile -->store  to vec --->sort ---> calculate count ---> print 40     if(argc < 2) 41     { 42         perror("Usage:exe srcfile"); 43         exit(EXIT_FAILURE); 44     } 45     vector<Word> vec ; 46      47     vector<string> stopList ; 48      49     ifstream infile(argv[1]); 50     ifstream stopfile("stop_list.txt"); 51     if( ! stopfile ) 52         throw std::runtime_error("stop_list does not exist!"); 53     if( !infile ) 54         throw std::runtime_error("open file failure"); 55      56     int64_t starttime = getTime(); 57     ReadStopList(stopfile , stopList); 58     ReadFile( infile, vec ,stopList ); 59     int64_t readtime = getTime(); 60      61     WordSort( vec ); 62  63     int64_t sorttime = getTime(); 64     printFrequency( vec ) ; 65      66     int printtime = getTime(); 67     cout << vec.size() << endl ; 68     cout << "the time of reading file :" << (readtime - starttime) << endl ; 69     cout << "the time of  sorting: "  << (sorttime - readtime) << endl ; 70     cout<<"the time of printing"<< (printtime - sorttime) << endl ; 71     infile.close(); 72     return 0; 73 } 74  75 //dos2unix command : window-->unix  //将windows文档格式转化成unix文档格式 76 void ReadFile(ifstream &in,  77               vector<Word> &vec, 78               const vector<string>&stop ) 79 { 80     vec.clear(); 81     string s ; 82     while( in >> s) 83     { 84         ErasePunct(s); // judge s dose include punct or not 85         stringToLower(s); 86         if(! IsStopWord(stop ,s)) 87        { 88            AddWordToDict(vec , s); 89        } 90     } 91 } 92  93 void ReadStopList( ifstream& in , vector<string> &stop) 94 { 95     stop.clear(); 96     string s ; 97     while( in >> s) 98     { 99        stop.push_back( s );100     }101 }102 void stringToLower(string &s)103 {104     string::iterator it =s.begin();105     while(it != s.end())106     {107         if(isupper(*it))108             *it = tolower(*it);109         it++ ;110     }111 }112 113 114 bool IsStopWord(const vector<string>&stop , const string s )115 {116     vector<string>::const_iterator it = stop.begin();117     while( it != stop.end()) 118     {119         if((*it) == s)120         {121             return true ;122         }123         it ++ ;124     }125     if(it == stop.end())126         return false ;127    //还可以用find函数 ,代码如下128    /*129         vector<string>::const_iterator it = 130         find(stop.begin(), stop.end(), s );131     return (it != stop.end());132         133         */134 }135 136 void ErasePunct(string &s)137 {138     string::iterator it = s.begin();139     while(it != s.end())140     {141         if(ispunct(*it))142             it = s.erase(it);143         else144             ++it ;145     }146 }147 148 void AddWordToDict(vector<Word> &vec ,const string s) 149 {150     vector<Word>::iterator it = vec.begin();////顺序查找151        while( it != vec.end())152     {153         if(it->word_ == s)154         {155             (it->count_) ++ ;156             break ;157         }158         ++ it ;159     }160     if(it == vec.end()) // 类似于链表操作161     {162         Word tmp ;163         tmp.word_ = s ;164         tmp.count_ = 1 ;165         vec.push_back(tmp);166     }167 }168 169 int tmp(const Word &w1 , const Word &w2)170 {171     // a > b172     return (w1.count_ > w2.count_);173 }174 void WordSort(vector<Word> &vec )175 {176     sort(vec.begin() , vec.end() ,tmp ); //库函数sort177 }178 179 void printFrequency( const vector<Word>&vec ) 180 {181     for(vector<Word>::const_iterator it = vec.begin(); //注意此处的const182         it != vec.end();183         ++ it)184      printf("word :%s, frequency: %d\n", it->word_.c_str(), it->count_);185 int64_t getTime();186 }187 188 189 int64_t getTime()190 {191     struct timeval tm ;192     memset(&tm , 0, sizeof(tm));193     if(-1== gettimeofday(&tm ,NULL))194         throw runtime_error("gettime failure");195 196     int64_t  t =  tm.tv_usec ;197     t += tm.tv_sec*1000*1000 ; 198     return t ;199 }

本程序时间复杂度为O(n*n),虾片文章我们将讨论时间复杂度更优的算法。

 

C++学习之IO流