全文检索之lucene的优化篇--创建索引库

首页 > 代码库 > 全文检索之lucene的优化篇--创建索引库

全文检索之lucene的优化篇--创建索引库

2024-10-19 16:14:01 209人阅读

在上一篇HelloWorld的基础上，建立一个directory的包,添加一个DirectoryTest的测试类,用来根据指定的索引目录创建目录存放指引.

技术分享

DirectoryTest类中的代码如下,基本上就是在HelloWorld的基础上改改就可以了.

里面一共三个方法,testDirectory(),测试创建索引库;testDirectoryFSAndRAM(),结合方法1的两种创建方式,优化;testDirectoryOptimize(),在方法2个基础上,研究索引的优化创建,减少创建的索引文件数.

package com.lucene.directory;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.junit.Test;

import com.lucene.units.File2DocumentUtils;

/**
 * 创建索引库
 * @author liu
 *
 */
public class DirectoryTest {

	// 需要查询的文件所在的路径
	String filePath = "F:\\Users\\liuyanling\\workspace\\LuceneDemo\\datasource\\peoplewhocannot.txt";
	// 设置索引存放的路径
	String indexPath = "F:\\Users\\liuyanling\\workspace\\LuceneDemo\\luceneIndex";
	// 设置分词器为标准分词器
	Analyzer analyzer = new StandardAnalyzer();

	/**
	 * 测试,自动建立索引库
	 * @throws Exception 抛出异常
	 */
	@Test
	public void testDirectory() throws Exception {
		//将索引生成到文件系统中(优点:数据永久保存;缺点:操作速度慢)
		Directory dir = FSDirectory.getDirectory(indexPath);
		
		/*//将索引生成到内存中(优点:速度快,缺点:程序关闭,数据清除)
		Directory dir = new RAMDirectory();*/
		
		//所以可以结合:1.生成的时候,生成到内存中;2.关闭的时候,保存起来.(参看下面的方法testDirectoryFSAndRAM)

		Document doc = File2DocumentUtils.file2Document(filePath);
		IndexWriter indexWriter = new IndexWriter(dir, analyzer, true,MaxFieldLength.LIMITED);
		indexWriter.addDocument(doc);

		indexWriter.close();
	}
	
	/**
	 * 测试,启动时读取内存中,退出时保存
	 * @throws Exception 抛出异常
	 */
	@Test
	public void testDirectoryFSAndRAM() throws Exception {
		//创建文件系统的索引库
		Directory fsDir = FSDirectory.getDirectory(indexPath); 
		
		//1.启动时读取
		//构造内存的索引库
		Directory ramDir = new RAMDirectory(fsDir);
		//运行程序时操作ramDir,构造操作内存索引库的索引器
		IndexWriter ramIndexWriter = new IndexWriter(ramDir,analyzer,MaxFieldLength.LIMITED);
		//添加文档Document,将文档创建索引到内存索引库
		Document doc = File2DocumentUtils.file2Document(filePath);
		ramIndexWriter.addDocument(doc);
		ramIndexWriter.close();
		
		//2.退出时保存
		//构造文件系统的索引器,true表示要重写指定的存放索引目录下的索引文件,false则表示在指定存放索引目录下已经存在的索引文件的基础上，向其中继续追加新的索引文件。
		IndexWriter fsIndexWriter = new IndexWriter(fsDir,analyzer,true,MaxFieldLength.LIMITED);
		//不进行优化的添加索引,将内存中的索引库添加到文件系统中
		fsIndexWriter.addIndexesNoOptimize(new Directory[] {ramDir});
		
		//fsIndexWriter.flush();
		//fsIndexWriter.optimize();
		
		fsIndexWriter.close();
	}
	
	/**
	 * 索引生成,优化,合并索引文件,减少生成索引文件数
	 * @throws Exception 
	 */
	@Test
	public void testDirectoryOptimize() throws Exception {
		//创建文件系统的索引库
		Directory fsDir = FSDirectory.getDirectory(indexPath); 
		//构造索引器
		IndexWriter fsIndexWriter = new IndexWriter(fsDir,analyzer,MaxFieldLength.LIMITED);
		//使用索引器,优化的生成索引
		fsIndexWriter.optimize();
		fsIndexWriter.close();
	}

}

查看运行效果,

1.先测试testDirectory()的Directory dir =FSDirectory.getDirectory(indexPath); 为体现效果,先删除现有的索引文件.

技术分享