首页 > 代码库 > 词频统计

词频统计

作业要求:

1.读取文件;

2.记录出现的词汇及出现频率;

3.输出运行结果。

编码实现:

// pin.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <string>
#define SOURCE_H

struct node
{
int col;
int row;
node* next;
};
struct Node
{
char words[20];
node* ptr;
Node* next;
int num;

};

class TLink
{
public:
TLink() { head = NULL; }
~TLink()
{
while( head != NULL )
{
Node* temp;
temp = head;
head = head -> next;
delete temp;
}
}
void Insert( char* Item );
void calcute(char *szFile,int size);
Node* gethead();
private:
Node* head;
};

char A_to_a( char alp );

void showwindow();

void show_text();

void input();
#include<iostream>
#include<fstream>
#include<cstdlib>
//#include "source.h"
using namespace std;

TLink link;
int i=0;
char szFile[2000];

int main()
{
show_text();
cout << endl;
input();

return 0;
}


/****************************************************************/

void TLink::Insert(char *Item)
{
int flag = 0;
Node* temp;
temp = new Node;
int i = 0;
while( Item[i] != ‘\0‘ )
{
temp -> words[i] = Item[i];
++ i;
}
temp -> num = i;
temp -> words[i] = ‘\0‘;


Node* ptrr = NULL;
ptrr = link.gethead();
while( ptrr != NULL )
{
if( ptrr -> num == temp -> num )
{
int n;
for( n = 0; n < i; ++ n )
if( A_to_a( ptrr -> words[n] ) != A_to_a( Item[n] ) )
break;
if( n == i )
{
flag = 1;
break;
}
}
ptrr = ptrr -> next;
}

 


if( flag != 1 )
{

temp -> ptr = NULL;
temp -> next = NULL;
Node* Temp = head;
if( head == NULL )
{
head = temp;
}
else
{
while( Temp -> next != NULL )
Temp = Temp -> next;
Temp -> next = temp;
}
}
else
delete temp;

}

/*****************************************************************/

char A_to_a( char alp )
{
if( ( alp >= ‘A‘ ) && ( alp <= ‘Z‘ ) )
alp = alp + 32;
return alp;
}

/*****************************************************************/

void TLink::calcute(char *szFile, int size)
{
//cout << "calcute is called!" << endl;
int i = 0; //记录已搜索过的字符数-1
int col = 1;//列标
int row = 0;//行标
int count;//记录空格数-1
Node* ptrr = NULL;

while( i < size )
{
ptrr = link.gethead();
int j = 0;//对每个单词从开始计数
while( ( szFile[i] >= ‘a‘ && szFile[i] <= ‘z‘ ) || ( szFile[i] >= ‘A‘ && szFile[i] <= ‘Z‘ ) )
{
++ i;
++ j;
}
while( ptrr != NULL )
{
if( ptrr -> num == j )
{
int n;
for( n = 0; n <= j; ++ n )
if( A_to_a( ptrr -> words[n] ) != A_to_a( szFile[i - j + n] ) )
break;
if( n == j )
{
node* temp;
temp = new node;
temp -> col = col;
temp -> row = row;
temp -> next = NULL;
node* Temp = ptrr -> ptr;
if( ptrr -> ptr == NULL )
{
ptrr -> ptr = temp;
}
else
{
while( Temp -> next != NULL )
Temp = Temp -> next;
Temp -> next = temp;
}
}//插入行数
}

ptrr = ptrr -> next;
}
if( szFile[i] == ‘ ‘ || szFile[i] == ‘\n‘ )
{
count = -1;
while( szFile[i] == ‘ ‘ )
{

++ i; //设置列数
++ row;//行的单词个数加
++ count;//单词之间空格-1
}
row = row - count;
if( szFile[i] == ‘\n‘ )
{
++ col; //列遇到换行累加
++ i;
row = 0;//单词的行个数清零
}
}
else
++ i;
}
cout << endl;

}

/****************************************************************/


Node* TLink::gethead()
{
return head;
}

/********************************************************/

void showwindow()
{

Node* curptr = link.gethead();


while( curptr != NULL )
{

int word_num = 0;
for( int k = 0; curptr -> words[k] != ‘\0‘; ++ k )
cout << curptr -> words[k];

cout << endl;
if( curptr -> ptr == NULL )
cout << "没有该词,或输入不正确!" << endl;
else
while( curptr -> ptr != NULL )
{
cout << "(";
cout << curptr -> ptr -> col ;
cout << ",";
cout << curptr -> ptr -> row ;
cout << ")";
cout << ‘ ‘;

curptr -> ptr = curptr -> ptr -> next;
word_num ++;
}
cout << endl;
cout << "该单词共出现" << word_num << "次!" << endl;

curptr = curptr -> next;

}


}


/*************************************************************/
void show_text()
{
ifstream fin;
fin.open("F:\\pin\\Debug\\1.txt");
if (fin.fail())
{
cout<<"Input file opening failed.\n";
exit(1);
}

char next;

fin.get(next);
while (! fin.eof())
{
szFile[i] = next;
++ i;
fin.get(next);
}
szFile[i] = ‘\0‘;

for( int k = 0; k < i; ++ k )
cout << szFile[k];
cout << "*****Total number :" << i << endl;
cout << "***************************************************************************" << endl;
}


/**********************************************************************/
void input()
{
char Item[40]; //暂存数组
char in; //接受输入字符
char ans; //判断是否重新开始

do{
if( link.gethead() != NULL )
link.~TLink();
cout << "请输入要统计的单词,单词之间用逗号隔开(输入@键结束,本程序忽略空格):" << endl;
cin >> in;
int flag = 1;
while( true )
{
if( in == ‘@‘ )
break;
int m = 0;
while( in != ‘,‘ )
{
Item[m] = in;
++ m;
cin >> in;
if( in == ‘@‘ )
{
flag = 0;
break;
}
}

Item[m] = ‘\0‘;

link.Insert( Item );
if( flag == 0 )
break;
cin >> in;
}

if( link.gethead() == NULL )
cout << "没有插入任何单词!" << endl;
else
{
link.calcute( szFile, i );
showwindow();
}
cout << "是否继续?(Y/y or N/n):";
cin >> ans;
}while( ( ans != ‘n‘ ) && ( ans != ‘N‘ ) );
}

运行结果:

技术分享

 

词频统计