首页 > 代码库 > c++下lrc歌词文件检索(自己写的检索歌词文件,记录点滴)

c++下lrc歌词文件检索(自己写的检索歌词文件,记录点滴)

贴上一个文档,是写这个程序的基本的思路,当然具体的程序和这个有一些出入,但是大体上一样。求批评指正。

/*目标:在一个文件夹下吧所有的lrc歌词文件用程序导入,进行分析,最后的结果用TXT表示出来。*/

/*分析:只要建立了倒排索引就可以很容易把歌词的索引文件输入到il.txt中,所以应当有一个函数实现输入倒排索引,而后建立文件输出txt:

0.bool Lyricsindex_out(Lyric_index_list index_list[],int m ){}

//相关的有:

struct Word_item{//词项头存储

String word;

int freq=0;

 Word_Doc *head_docID;

};Word_item lyrics_head=new Word_item[];

 

struct Word_Doc{//词项位置存储

int text_number;

Word_Doc *next;

}*head_docID;

 

Word_Doc *temp;

Fstream  fout(“index_lyrics.txt”);

Fout<<”doc”<<setw(12)<<”freq”<<setw(12)<<”->”<<setw(12)<<”list”<<endl;

 

for(int i=0,int j=0;index_list[i]->next!=NULL;i++){

Fout<<index_list[i]->word<<setw(12)<<index_list[i]->freq<<setw(12)<<”->”<<setw(12);

temp=index_list[i]->head_docID;

For( j=0;temp!=NULL;j++){

Fout<<temp->text_number<<”,”;

temp=temp->next;

}

}

输出txt文件的写法:

1.应当有一个类,Class Lyric_Index_Analysis

处理从文件夹中的输入的lrc歌词文件,并且过滤掉”[...]”,最后只留下歌词的主体,然后把歌词的主体进行分析,以空格,回车,标点符号为界限,将每个词解析出来,

 

存入

lyrics_head[],

例如:

lyrics_head[0].word=xiejiang;

lyrics_head[0].freq=1;      

lyrics_head[0].head_docID=new Word_item;  

lyrics_head[0].head_docID->text_number=0;

lyrics_head[0].head_docID->next=NULL;

每处理一个文档时将最终生成的词项头的数组的最大值记住,这可以设置成一个函数,

Int get_lyrics_head(Word_item lyrics_head[],string  filename)

返回的时候返回数组和最大值

2.然后这个类处理完所有的文档之后,应当有一个

lyrics_head[]的数组,大小为n,

然后建立一个函数将所有的词项按照词项的字母序归并排序

之后仍然返回数组

Void Lyric_mergesort(Word_item lyrics_head[],int left,int right)

3.到最后一步,Index_list  Analy_Setup_index (Word_item lyrics_head[],int n)

建立链表

输入lyrics_head[]和n

对数组进行遍历,如果两个挨着的数内的word相同那么就将他们的进行合并

 

用一个指针指向lyrics_head[]和Word_item,直到这个数组相同的部分已经不存在了

然后将两个放在一个链表里。

因为归并排序是稳定的,所以可以直接将两个lyrics_head[].word连起来。如下方法:例如

 

struct Index_list{//用于生成最终的索引表

String word;

int freq=0;

Word_Doc *head_docID;

Index_list* next;

};

 

Word_Doc* lyrics_doc;                //=new Word_Doc[];

Index_list Lyric_index_list=new Index_list[];

 

Lyric_index_list[0].word=lyrics_head[0].word;

Lyric_index_list[0].head_docID=lyrics_head[0].head_docID;

Lyric_index_list[0].freq=lyrics_head[0].freq;

Lyric_index_list[0]->next=NULL;

 

Lyrics_doc=lyrics_head[0].head_docID;

 

For(int i=0,int j=0;i<n;i++){

If(Lyric_index_list[j].word==lyrics_head[i].word){

 

Lyric_index_list[j].freq++;

Lyrics_doc->next=lyrics_head[i].head_docID;

Lyrics_doc=Lyrics_doc->next;

}

Else{

J++;

Lyric_index_list[j-1].next=Lyric_index_list[j];

Lyric_index_list[j]->next=NULL;

Lyric_index_list[j].word=lyrics_head[i].word;

Lyric_index_list[j].head_docID=lyrics_head[i].head_docID;

Lyric_index_list[j].freq=lyrics_head[i].freq;

}

}

 Return Lyric_index_list;

*/

好了废话少说贴代码

首先main所在的.cpp:

#include<iostream>#include<fstream>#include<string>#include<iomanip>#include <stdio.h>#include<io.h>#include"Analysis_Lyrics.h"using namespace std;int main(){	Lyric_Index_Analysis Lyric_A;//建立歌词分析的类	string str = "\0";	int bound = 0, upper = 0, i = 1;//有n个数,则有上标为n,这是词项表的上下界	Word_item *temp = new Word_item[Max_size - 500];//用于归并交换的需要	struct _finddata_t fileinfo;//文件处理,寻找默认目录下的歌词文件	long pFile;//调用成功返回0,否则返回-1//	cout << "第 " << pFile << " 个文档是:" << fileinfo.name << "  编号为:1" << endl;	if ((pFile = _findfirst("*.lrc", &fileinfo)) == -1) {		cout << "不存在.lrc文件" << endl;		return 0;	}	else {		cout << "第 " << i++ << " 个文档是:" << fileinfo.name << "  编号为:1" << endl;		Lyric_A.Lyrics_input(fileinfo.name, upper, bound, 1);//输入文件调用		cout << "下界为->" << bound << "上界为->" << upper << endl;		while (_findnext(pFile, &fileinfo) == 0) {			cout << "第 " << i << " 个文档是:" << fileinfo.name << "  编号为:" << i << endl;			bound = upper;			Lyric_A.Lyrics_input(fileinfo.name, upper, bound, i);			cout << "下界为->" << bound << "上界为->" << upper << endl;			i++;		}	}	_findclose(pFile);	bound = 0;	cout << "总的词项表的下界为->" << bound << "上界为->" << upper << endl;	Lyric_A.Lyric_mergesort(Lyric_A.Return_lyrics_head(),temp, bound, upper - 1);	Lyric_A.print(upper);	Lyric_A.Lyricsindex_out(upper);}//如下的小代码是遍历一个文件夹下的文档的程序/*#include<iostream>#include <io.h>  using namespace std;int main(){	struct _finddata_t fileinfo;	long hFile;	if ((hFile = _findfirst("*.lrc", &fileinfo)) == -1)		return -1;	else {		cout << fileinfo.name << endl;		while (_findnext(hFile, &fileinfo) == 0){						cout << fileinfo.name << endl;		}	}	_findclose(hFile);	return 0;}*/

接着是处理歌词文件的类.h:

#include<iostream>using namespace std;static const int Max_size = 4000;static const int max_size = 200;struct Word_Doc {//词项位置存储,包含词的所在的文档编号,下一个词项所在位置	int text_number;	Word_Doc *next = nullptr;};struct Word_item {//用于第一次遍历整个文档时的存储每个词的头项,包含单词和下一个词项	string word;	Word_Doc *head_docID = nullptr;};struct Index_list {//用于生成最终的索引表,包含单词,单词存在的文档的总数,单词存在文档位置的索引,下一个单词	string word;	int freq = 0;	Word_Doc *head_docID = nullptr;	Index_list* next = nullptr;};class Lyric_Index_Analysis {//分析lrc歌词文件的主类,用各个函数将歌词文档分析出来然后建立成索引文档private:	Word_item* lyrics_head;	Index_list* L_H_List;public:	int bound = 0, upper = 0;//代表目前词项表的上界和下界,上界和下界随着文档数的处理有所不同	Lyric_Index_Analysis() {		lyrics_head = new Word_item[Max_size];		L_H_List = new Index_list;//建立一个索引链表	};	~Lyric_Index_Analysis() {		delete[]lyrics_head;		Index_list*temp = L_H_List;		while (temp != nullptr) {			temp = L_H_List->next;			delete L_H_List;		}	};	Word_item* Return_lyrics_head() {		return lyrics_head;	}	Index_list* Return_L_H_List() {		return L_H_List;	}	void Lyrics_input(string filename, int& upper, int bound, int number);//打开文件输入歌词,参数分别为:文件名,词项表的上界,词项表的下界,文档的编号。调用insert_Word_List(Word_item lyrics_head[], int &upper,int bound, char* elem,int position),最终返回词项表	bool insert_Word_List(Word_item lyrics_head[], int& upper, int bound, char* elem, int position);//将单词插入词项表,如果不在就插入,在则直接退出(仅限于当前的文档),参数为:词项表,上界,下界,单词,文档的编号	void Lyric_mergesort(Word_item lyrics_head[], Word_item temp[], int left, int right);//对词项的表进行归并排序	void  Analys_Setup_index(Word_item lyrics_head[], int n);//对词项建立最终的索引表	bool Lyricsindex_out(int n) {//Index_list L_index_list[], int m){//将最终的索引程序输出来,输入在Lyrics_Index_List.txt中		Analys_Setup_index(lyrics_head, n);		ofstream fout("Lyrics_Index_List.txt", ios::trunc);		fout.setf(ios::left);		//fout << setw(20) << "word" << setw(5) << "freq" << setw(3) << "       " << "docID" << endl;		while (L_H_List != nullptr) {			Word_Doc* temp = L_H_List->head_docID;			fout << L_H_List->word << "#" << L_H_List->freq << "@";			cout << L_H_List->word << " 出现在";			while (temp->next != nullptr) {				cout << temp->text_number << ",";				fout << temp->text_number << ",";				temp = temp->next;			}			fout << temp->text_number << endl;			cout << temp->text_number;			cout << "  号文档,频率为 " << L_H_List->freq << endl;			L_H_List = L_H_List->next;		}		fout.close();		return false;	}	void print(int n) {		for (int i = 0; i < n; i++) {			cout << lyrics_head[i].word << " 出现在" << lyrics_head[i].head_docID->text_number << " 号文档" << endl;		}	}};

  再然后是类的具体的.cpp文件:

#include<fstream>#include<string>#include<iomanip>#include"Analysis_Lyrics.h"//打开文件输入歌词,参数分别为:文件名,词项表的上界,词项表的下界,文档的编号。调用insert_Word_List(Word_item lyrics_head[], int &upper,int bound, char* elem,int position),最终返回词项表void Lyric_Index_Analysis::Lyrics_input(string filename, int& upper, int bound, int number) {	//lyrics_head = new Word_item[Max_size];	ifstream fin(filename);	if (!fin.is_open()) {		cout << "文件读取失败!\n";		exit(0);	}	string str;	getline(fin, str);	//遍历整个文档,每次读取一行,然后进行分析	do {		cout << str << endl;		char c[max_size] = { ‘\0‘ };		int i = 0, ic = 0;		for (i = 0; str[i] != ‘]‘; i++);		for (int j = i + 1; str[j] != ‘\r‘&&str[j] != ‘\n‘&&str[j] != ‘\0‘; j++) {			//去掉引号后面的字符,但是如果是t的话就不去			if ((int)str[j] == 39) {				while (str[j] != ‘ ‘&&str[j] != ‘\r‘&&str[j] != ‘\n‘&&str[j] != ‘\0‘) {					j++;					if (str[j] == ‘t‘) {						j--;						break;					}				}				if (str[j] == ‘\r‘ || str[j] == ‘\n‘ || str[j] == ‘\0‘)					break;			}			//除去大小写			if (((int)str[j] >= 65) && ((int)str[j] <= 91))				c[ic++] = (int)str[j] + 32;			else				c[ic++] = str[j];			//cout << "daxiao--------------->" << (int)str[j] << endl;		}		//	cout <<"分割后的字符串:  "<< c << endl;		const char *d = "[] -;,:/?!.()";//以这些字符为分界符		char *p = NULL;		char *next_p = NULL;		p = strtok_s(c, d, &next_p);		while (p)		{			insert_Word_List(lyrics_head, upper, bound, p, number);			//	cout <<"上标是"<<upper<< "分出来了:" << lyrics_head[upper-1].word << endl;			p = strtok_s(NULL, d, &next_p);		}		getline(fin, str);	} while (!fin.eof());	fin.close();};//将单词插入词项表,如果不在就插入,在则直接退出(仅限于当前的文档),参数为:词项表,上界,下界,单词,文档的编号bool Lyric_Index_Analysis::insert_Word_List(Word_item lyrics_head[], int& upper, int bound, char* elem, int position) {	for (int i = bound; i<upper; i++) {		if (lyrics_head[i].word == elem)			return false;	}	lyrics_head[upper].head_docID = new Word_Doc;	lyrics_head[upper].head_docID->text_number = position;	lyrics_head[upper].head_docID->next = nullptr;	lyrics_head[upper].word = elem;	//cout << "分出来了(后面):" << lyrics_head[upper].word << endl;	upper++;	return true;};//对词项的表进行归并排序void Lyric_Index_Analysis::Lyric_mergesort(Word_item lyrics_head[], Word_item temp[], int left, int right) {	int i, j, k, mid = (left + right) / 2;	if (left == right)		return;	Lyric_mergesort(lyrics_head, temp, left, mid);	Lyric_mergesort(lyrics_head, temp, mid + 1, right);	for (i = mid; i >= left; i--)		temp[i] = lyrics_head[i];	for (j = 1; j <= right - mid; j++)		temp[right - j + 1] = lyrics_head[j + mid];	for (i = left, j = right, k = left; k <= right; k++)		if (temp[i].word <= temp[j].word)			lyrics_head[k] = temp[i++];		else			lyrics_head[k] = temp[j--];};//对词项建立最终的索引表void Lyric_Index_Analysis::Analys_Setup_index(Word_item lyrics_head[], int n) {	Word_Doc *temp;//用于每个词项出现在文档中的位置的索引	Index_list* t_L_H_List = L_H_List;	t_L_H_List->word = lyrics_head[0].word;	t_L_H_List->freq = 1;	t_L_H_List->head_docID = lyrics_head[0].head_docID;	t_L_H_List->next = nullptr;	temp = t_L_H_List->head_docID;	cout << "单词是" << t_L_H_List->word << " 出现在 " << temp->text_number << " 号文档,此时频率是" << t_L_H_List->freq << endl;	for (int i = 1; i < n; i++) {		while (lyrics_head[i - 1].word == lyrics_head[i].word) {			temp->next = lyrics_head[i].head_docID;			temp = temp->next;			cout << "        词项和上一个相等,出现在" << temp->text_number << " 号文档,此时频率是" << t_L_H_List->freq + 1 << endl;			t_L_H_List->freq++;			++i;		}		if (i == n)			break;		Index_list* temp_L_H_List = new Index_list;		temp_L_H_List->word = lyrics_head[i].word;		temp_L_H_List->freq = 1;		temp_L_H_List->head_docID = lyrics_head[i].head_docID;		temp = temp_L_H_List->head_docID;		temp_L_H_List->next = nullptr;		t_L_H_List->next = temp_L_H_List;		t_L_H_List = t_L_H_List->next;		cout << "单词是" << t_L_H_List->word << " 出现在 " << temp->text_number << " 号文档,此时频率是" << t_L_H_List->freq << endl;	}};

  

c++下lrc歌词文件检索(自己写的检索歌词文件,记录点滴)